[PATCH] D138518: Update the list of double with codepoints
Corentin Jabot via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 22 11:32:19 PST 2022
cor3ntin created this revision.
Herald added a subscriber: hiraditya.
Herald added a project: All.
cor3ntin requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.
All east asian width wide and full-width codepoints
are considered double width, as well as emojis and
symbols commonely rendered as emoji.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D138518
Files:
llvm/lib/Support/Unicode.cpp
Index: llvm/lib/Support/Unicode.cpp
===================================================================
--- llvm/lib/Support/Unicode.cpp
+++ llvm/lib/Support/Unicode.cpp
@@ -300,8 +300,7 @@
/// * 0 for non-spacing and enclosing combining marks;
/// * 2 for CJK characters excluding halfwidth forms;
/// * 1 for all remaining characters.
-static inline int charWidth(int UCS)
-{
+static inline int charWidth(int UCS) {
if (!isPrintable(UCS))
return ErrorNonPrintableCharacter;
@@ -430,26 +429,45 @@
if (CombiningCharacters.contains(UCS))
return 0;
+ // We consider double width codepoints any codepoint with
+ // the property East_Asian_Width=F|W
+ // + Misc Symbols and Pictographs (U+1F300...U+1F5FF)
+ // + Supplemental Symbols and Pictographs (U+1F900...U+1F9FF)
static const UnicodeCharRange DoubleWidthCharacterRanges[] = {
- // Hangul Jamo
- { 0x1100, 0x11FF },
- // Deprecated fullwidth angle brackets
- { 0x2329, 0x232A },
- // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi
- // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE)
- { 0x2E80, 0x303E }, { 0x3040, 0xA4CF },
- // Hangul
- { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB },
- // CJK Unified Ideographs
- { 0xF900, 0xFAFF },
- // Vertical forms
- { 0xFE10, 0xFE19 },
- // CJK Compatibility Forms + Small Form Variants
- { 0xFE30, 0xFE6F },
- // Fullwidth forms
- { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 },
- // CJK Unified Ideographs
- { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F }
+ {0x1100, 0x115F}, {0x231A, 0x231B}, {0x2329, 0x232A},
+ {0x23E9, 0x23EC}, {0x23F0, 0x23F0}, {0x23F3, 0x23F3},
+ {0x25FD, 0x25FE}, {0x2614, 0x2615}, {0x2648, 0x2653},
+ {0x267F, 0x267F}, {0x2693, 0x2693}, {0x26A1, 0x26A1},
+ {0x26AA, 0x26AB}, {0x26BD, 0x26BE}, {0x26C4, 0x26C5},
+ {0x26CE, 0x26CE}, {0x26D4, 0x26D4}, {0x26EA, 0x26EA},
+ {0x26F2, 0x26F3}, {0x26F5, 0x26F5}, {0x26FA, 0x26FA},
+ {0x26FD, 0x26FD}, {0x2705, 0x2705}, {0x270A, 0x270B},
+ {0x2728, 0x2728}, {0x274C, 0x274C}, {0x274E, 0x274E},
+ {0x2753, 0x2755}, {0x2757, 0x2757}, {0x2795, 0x2797},
+ {0x27B0, 0x27B0}, {0x27BF, 0x27BF}, {0x2B1B, 0x2B1C},
+ {0x2B50, 0x2B50}, {0x2B55, 0x2B55}, {0x2E80, 0x2E99},
+ {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB},
+ {0x3000, 0x303E}, {0x3041, 0x3096}, {0x3099, 0x30FF},
+ {0x3105, 0x312F}, {0x3131, 0x318E}, {0x3190, 0x31E3},
+ {0x31F0, 0x321E}, {0x3220, 0x3247}, {0x3250, 0xA48C},
+ {0xA490, 0xA4C6}, {0xA960, 0xA97C}, {0xAC00, 0xD7A3},
+ {0xF900, 0xFAFF}, {0xFE10, 0xFE19}, {0xFE30, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFF01, 0xFF60},
+ {0xFFE0, 0xFFE6}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1},
+ {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
+ {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
+ {0x1B000, 0x1B122}, {0x1B132, 0x1B132}, {0x1B150, 0x1B152},
+ {0x1B155, 0x1B155}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB},
+ {0x1F004, 0x1F004}, {0x1F0CF, 0x1F0CF}, {0x1F18E, 0x1F18E},
+ {0x1F191, 0x1F19A}, {0x1F200, 0x1F202}, {0x1F210, 0x1F23B},
+ {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265},
+ {0x1F300, 0x1F64F}, {0x1F680, 0x1F6C5}, {0x1F6CC, 0x1F6CC},
+ {0x1F6D0, 0x1F6D2}, {0x1F6D5, 0x1F6D7}, {0x1F6DC, 0x1F6DF},
+ {0x1F6EB, 0x1F6EC}, {0x1F6F4, 0x1F6FC}, {0x1F7E0, 0x1F7EB},
+ {0x1F7F0, 0x1F7F0}, {0x1F900, 0x1F9FF}, {0x1FA70, 0x1FA7C},
+ {0x1FA80, 0x1FA88}, {0x1FA90, 0x1FABD}, {0x1FABF, 0x1FAC5},
+ {0x1FACE, 0x1FADB}, {0x1FAE0, 0x1FAE8}, {0x1FAF0, 0x1FAF8},
+ {0x20000, 0x2FFFD}, {0x30000, 0x3FFFD}
};
static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges);
@@ -493,4 +511,3 @@
} // namespace unicode
} // namespace sys
} // namespace llvm
-
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D138518.477263.patch
Type: text/x-patch
Size: 3971 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20221122/bf2d6da8/attachment.bin>
More information about the llvm-commits
mailing list