r187935 - Support for double width characters.

Daniel Jasper djasper at google.com
Wed Aug 7 19:10:46 PDT 2013


With this patch, clang-format tests assert on Mac. The underlying issue is
that tab characters are not-printable. Thus, the corresponding function on
mac (somewhat correctly) return -1 and columnWidth asserts. I think we
should do two things:
1) Move the generic columnWidth location into an accessible location. Then
directly use that from clang-format. We do not want clang-format to output
different layouts based on the platform.
2) We probably should not count tab characters as 0-width (as I think we
know do). Can we instead use the configured IndentWidth?

Cheers,
Daniel


On Wed, Aug 7, 2013 at 4:29 PM, Alexander Kornienko <alexfh at google.com>wrote:

> Author: alexfh
> Date: Wed Aug  7 18:29:01 2013
> New Revision: 187935
>
> URL: http://llvm.org/viewvc/llvm-project?rev=187935&view=rev
> Log:
> Support for double width characters.
>
> Summary: Only works for UTF-8-encoded files.
>
> Reviewers: djasper
>
> Reviewed By: djasper
>
> CC: cfe-commits, klimek
>
> Differential Revision: http://llvm-reviews.chandlerc.com/D1311
>
> Modified:
>     cfe/trunk/lib/Format/BreakableToken.cpp
>     cfe/trunk/unittests/Format/FormatTest.cpp
>
> Modified: cfe/trunk/lib/Format/BreakableToken.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/BreakableToken.cpp?rev=187935&r1=187934&r2=187935&view=diff
>
> ==============================================================================
> --- cfe/trunk/lib/Format/BreakableToken.cpp (original)
> +++ cfe/trunk/lib/Format/BreakableToken.cpp Wed Aug  7 18:29:01 2013
> @@ -20,6 +20,7 @@
>  #include "clang/Format/Format.h"
>  #include "llvm/ADT/STLExtras.h"
>  #include "llvm/Support/Debug.h"
> +#include "llvm/Support/Locale.h"
>  #include <algorithm>
>
>  namespace clang {
> @@ -38,6 +39,15 @@ static bool IsBlank(char C) {
>    }
>  }
>
> +static unsigned columnWidth(StringRef Text, encoding::Encoding Encoding) {
> +  if (Encoding == encoding::Encoding_UTF8) {
> +    int ContentWidth = llvm::sys::locale::columnWidth(Text);
> +    if (ContentWidth >= 0)
> +      return ContentWidth;
> +  }
> +  return encoding::getCodePointCount(Text, Encoding);
> +}
> +
>  static BreakableToken::Split getCommentSplit(StringRef Text,
>                                               unsigned ContentStartColumn,
>                                               unsigned ColumnLimit,
> @@ -49,9 +59,12 @@ static BreakableToken::Split getCommentS
>    unsigned MaxSplitBytes = 0;
>
>    for (unsigned NumChars = 0;
> -       NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars)
> -    MaxSplitBytes +=
> +       NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
> +    unsigned NumBytes =
>          encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
> +    NumChars += columnWidth(Text.substr(MaxSplitBytes, NumBytes),
> Encoding);
> +    MaxSplitBytes += NumBytes;
> +  }
>
>    StringRef::size_type SpaceOffset = Text.find_last_of(Blanks,
> MaxSplitBytes);
>    if (SpaceOffset == StringRef::npos ||
> @@ -84,9 +97,8 @@ static BreakableToken::Split getStringSp
>      return BreakableToken::Split(StringRef::npos, 0);
>    if (ColumnLimit <= ContentStartColumn)
>      return BreakableToken::Split(StringRef::npos, 0);
> -  unsigned MaxSplit =
> -      std::min<unsigned>(ColumnLimit - ContentStartColumn,
> -                         encoding::getCodePointCount(Text, Encoding) - 1);
> +  unsigned MaxSplit = std::min<unsigned>(ColumnLimit - ContentStartColumn,
> +                                         columnWidth(Text, Encoding) - 1);
>    StringRef::size_type SpaceOffset = 0;
>    StringRef::size_type SlashOffset = 0;
>    StringRef::size_type WordStartOffset = 0;
> @@ -98,7 +110,7 @@ static BreakableToken::Split getStringSp
>        Chars += Advance;
>      } else {
>        Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
> -      Chars += 1;
> +      Chars += columnWidth(Text.substr(0, Advance), Encoding);
>      }
>
>      if (Chars > MaxSplit)
> @@ -131,7 +143,7 @@ unsigned BreakableSingleLineToken::getLi
>  unsigned BreakableSingleLineToken::getLineLengthAfterSplit(
>      unsigned LineIndex, unsigned Offset, StringRef::size_type Length)
> const {
>    return StartColumn + Prefix.size() + Postfix.size() +
> -         encoding::getCodePointCount(Line.substr(Offset, Length),
> Encoding);
> +         columnWidth(Line.substr(Offset, Length), Encoding);
>  }
>
>  BreakableSingleLineToken::BreakableSingleLineToken(
> @@ -329,8 +341,7 @@ unsigned BreakableBlockComment::getLineC
>  unsigned BreakableBlockComment::getLineLengthAfterSplit(
>      unsigned LineIndex, unsigned Offset, StringRef::size_type Length)
> const {
>    return getContentStartColumn(LineIndex, Offset) +
> -         encoding::getCodePointCount(Lines[LineIndex].substr(Offset,
> Length),
> -                                     Encoding) +
> +         columnWidth(Lines[LineIndex].substr(Offset, Length), Encoding) +
>           // The last line gets a "*/" postfix.
>           (LineIndex + 1 == Lines.size() ? 2 : 0);
>  }
>
> Modified: cfe/trunk/unittests/Format/FormatTest.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/unittests/Format/FormatTest.cpp?rev=187935&r1=187934&r2=187935&view=diff
>
> ==============================================================================
> --- cfe/trunk/unittests/Format/FormatTest.cpp (original)
> +++ cfe/trunk/unittests/Format/FormatTest.cpp Wed Aug  7 18:29:01 2013
> @@ -5704,15 +5704,15 @@ TEST_F(FormatTest, CountsUTF8CharactersP
>    verifyFormat("\"Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
> пору...\"",
>                 getLLVMStyleWithColumns(35));
>    verifyFormat("\"一 二 三 å›› 五 å…­ 七 å…« ä¹  å  \"",
> -               getLLVMStyleWithColumns(21));
> +               getLLVMStyleWithColumns(31));
>    verifyFormat("// Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
> пору...",
>                 getLLVMStyleWithColumns(36));
>    verifyFormat("// 一 二 三 å›› 五 å…­ 七 å…« ä¹  å  ",
> -               getLLVMStyleWithColumns(22));
> +               getLLVMStyleWithColumns(32));
>    verifyFormat("/* Однажды в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
> пору... */",
>                 getLLVMStyleWithColumns(39));
>    verifyFormat("/* 一 二 三 å›› 五 å…­ 七 å…« ä¹  å   */",
> -               getLLVMStyleWithColumns(25));
> +               getLLVMStyleWithColumns(35));
>  }
>
>  TEST_F(FormatTest, SplitsUTF8Strings) {
> @@ -5723,11 +5723,12 @@ TEST_F(FormatTest, SplitsUTF8Strings) {
>        "\"пору,\"",
>        format("\"Однажды, в Ñ Ñ‚ÑƒÐ´Ñ‘Ð½ÑƒÑŽ зимнюю
> пору,\"",
>               getLLVMStyleWithColumns(13)));
> -  EXPECT_EQ("\"一 二 三 四 \"\n"
> -            "\"五 六 七 八 \"\n"
> -            "\"ä¹  å  \"",
> -            format("\"一 二 三 å›› 五 å…­ 七 å…« ä¹  å  \"",
> -                   getLLVMStyleWithColumns(10)));
> +  EXPECT_EQ("\"一 二 三 \"\n"
> +            "\"四 五六 \"\n"
> +            "\"七 å…« ä¹  \"\n"
> +            "\"å  \"",
> +            format("\"一 二 三 å›› 五六 七 å…« ä¹  å  \"",
> +                   getLLVMStyleWithColumns(11)));
>  }
>
>  TEST_F(FormatTest, SplitsUTF8LineComments) {
> @@ -5739,9 +5740,9 @@ TEST_F(FormatTest, SplitsUTF8LineComment
>                     getLLVMStyleWithColumns(13)));
>    EXPECT_EQ("// 一二三\n"
>              "// 四五六七\n"
> -            "// å…«\n"
> -            "// ä¹  å  ",
> -            format("// 一二三 四五六七 å…«  ä¹  å  ",
> getLLVMStyleWithColumns(6)));
> +            "// å…«  ä¹ \n"
> +            "// å  ",
> +            format("// 一二三 四五六七 å…«  ä¹  å  ",
> getLLVMStyleWithColumns(9)));
>  }
>
>  TEST_F(FormatTest, SplitsUTF8BlockComments) {
> @@ -5758,16 +5759,17 @@ TEST_F(FormatTest, SplitsUTF8BlockCommen
>                     getLLVMStyleWithColumns(13)));
>    EXPECT_EQ("/* 一二三\n"
>              " * 四五六七\n"
> -            " * å…«\n"
> -            " * ä¹  å  \n"
> -            " */",
> -            format("/* 一二三 四五六七 å…«  ä¹  å   */",
> getLLVMStyleWithColumns(6)));
> +            " * å…«  ä¹ \n"
> +            " * å    */",
> +            format("/* 一二三 四五六七 å…«  ä¹  å    */",
> getLLVMStyleWithColumns(9)));
>    EXPECT_EQ("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯\n"
>              " * 𠕓𠕪𠕥𠕖\n"
>              " * 𠖀𠕿𠕱-ð Ÿ  */",
>              format("/* 𠓣𠓮𠓼𠓽 𠔣𠔬𠔲𠔯 𠕓𠕪𠕥𠕖
> 𠖀𠕿𠕱-ð Ÿ  */", getLLVMStyleWithColumns(12)));
>  }
>
> +#endif // _MSC_VER
> +
>  TEST_F(FormatTest, FormatsWithWebKitStyle) {
>    FormatStyle Style = getWebKitStyle();
>
> @@ -5847,7 +5849,5 @@ TEST_F(FormatTest, FormatsWithWebKitStyl
>              format("if (aaaaaaaaaaaaaaa || bbbbbbbbbbbbbbb) { i++; }",
> Style));
>  }
>
> -#endif
> -
>  } // end namespace tooling
>  } // end namespace clang
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20130807/d262a08d/attachment.html>


More information about the cfe-commits mailing list