[clang] 64ab2b1 - Improve handling of static assert messages.
Corentin Jabot via cfe-commits
cfe-commits at lists.llvm.org
Wed Jun 29 05:57:41 PDT 2022
Author: Corentin Jabot
Date: 2022-06-29T14:57:35+02:00
New Revision: 64ab2b1dcc5136a744fcac21d3d2c59e9cce040a
URL: https://github.com/llvm/llvm-project/commit/64ab2b1dcc5136a744fcac21d3d2c59e9cce040a
DIFF: https://github.com/llvm/llvm-project/commit/64ab2b1dcc5136a744fcac21d3d2c59e9cce040a.diff
LOG: Improve handling of static assert messages.
Instead of dumping the string literal (which
quotes it and escape every non-ascii symbol),
we can use the content of the string when it is a
8 byte string.
Wide, UTF-8/UTF-16/32 strings are still completely
escaped, until we clarify how these entities should
behave (cf https://wg21.link/p2361).
`FormatDiagnostic` is modified to escape
non printable characters and invalid UTF-8.
This ensures that unicode characters, spaces and new
lines are properly rendered in static messages.
This make clang more consistent with other implementation
and fixes this tweet
https://twitter.com/jfbastien/status/1298307325443231744 :)
Of note, `PaddingChecker` did print out new lines that were
later removed by the diagnostic printing code.
To be consistent with its tests, the new lines are removed
from the diagnostic.
Unicode tables updated to both use the Unicode definitions
and the Unicode 14.0 data.
U+00AD SOFT HYPHEN is still considered a print character
to match existing practices in terminals, in addition of
being considered a formatting character as per Unicode.
Reviewed By: aaron.ballman, #clang-language-wg
Differential Revision: https://reviews.llvm.org/D108469
Added:
Modified:
clang/docs/ReleaseNotes.rst
clang/lib/Basic/Diagnostic.cpp
clang/lib/Sema/SemaDeclCXX.cpp
clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
clang/test/Lexer/null-character-in-literal.c
clang/test/Misc/diag-special-chars.c
clang/test/Misc/wrong-encoding.c
clang/test/SemaCXX/static-assert.cpp
llvm/include/llvm/Support/Unicode.h
llvm/lib/Support/Unicode.cpp
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3e29987ad2631..99288d0ffac4e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -273,6 +273,8 @@ Improvements to Clang's diagnostics
- When using class templates without arguments, clang now tells developers
that template arguments are missing in certain contexts.
This fixes `Issue 55962 <https://github.com/llvm/llvm-project/issues/55962>`_.
+- Printable Unicode characters within `static_assert` messages are no longer
+ escaped.
Non-comprehensive list of changes in this release
-------------------------------------------------
diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp
index deb398756e373..dbe62ecb50d33 100644
--- a/clang/lib/Basic/Diagnostic.cpp
+++ b/clang/lib/Basic/Diagnostic.cpp
@@ -25,8 +25,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/CrashRecoveryContext.h"
-#include "llvm/Support/Locale.h"
+#include "llvm/Support/Unicode.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -803,6 +804,50 @@ FormatDiagnostic(SmallVectorImpl<char> &OutStr) const {
FormatDiagnostic(Diag.begin(), Diag.end(), OutStr);
}
+/// pushEscapedString - Append Str to the diagnostic buffer,
+/// escaping non-printable characters and ill-formed code unit sequences.
+static void pushEscapedString(StringRef Str, SmallVectorImpl<char> &OutStr) {
+ OutStr.reserve(OutStr.size() + Str.size());
+ auto *Begin = reinterpret_cast<const unsigned char *>(Str.data());
+ llvm::raw_svector_ostream OutStream(OutStr);
+ const unsigned char *End = Begin + Str.size();
+ while (Begin != End) {
+ // ASCII case
+ if (isPrintable(*Begin) || isWhitespace(*Begin)) {
+ OutStream << *Begin;
+ ++Begin;
+ continue;
+ }
+ if (llvm::isLegalUTF8Sequence(Begin, End)) {
+ llvm::UTF32 CodepointValue;
+ llvm::UTF32 *CpPtr = &CodepointValue;
+ const unsigned char *CodepointBegin = Begin;
+ const unsigned char *CodepointEnd =
+ Begin + llvm::getNumBytesForUTF8(*Begin);
+ llvm::ConversionResult Res = llvm::ConvertUTF8toUTF32(
+ &Begin, CodepointEnd, &CpPtr, CpPtr + 1, llvm::strictConversion);
+ (void)Res;
+ assert(
+ llvm::conversionOK == Res &&
+ "the sequence is legal UTF-8 but we couldn't convert it to UTF-32");
+ assert(Begin == CodepointEnd &&
+ "we must be further along in the string now");
+ if (llvm::sys::unicode::isPrintable(CodepointValue) ||
+ llvm::sys::unicode::isFormatting(CodepointValue)) {
+ OutStr.append(CodepointBegin, CodepointEnd);
+ continue;
+ }
+ // Unprintable code point.
+ OutStream << "<U+" << llvm::format_hex_no_prefix(CodepointValue, 4, true)
+ << ">";
+ continue;
+ }
+ // Invalid code unit.
+ OutStream << "<" << llvm::format_hex_no_prefix(*Begin, 2, true) << ">";
+ ++Begin;
+ }
+}
+
void Diagnostic::
FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
SmallVectorImpl<char> &OutStr) const {
@@ -813,11 +858,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
StringRef(DiagStr, DiagEnd - DiagStr).equals("%0") &&
getArgKind(0) == DiagnosticsEngine::ak_std_string) {
const std::string &S = getArgStdStr(0);
- for (char c : S) {
- if (llvm::sys::locale::isPrint(c) || c == '\t') {
- OutStr.push_back(c);
- }
- }
+ pushEscapedString(S, OutStr);
return;
}
@@ -924,7 +965,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
case DiagnosticsEngine::ak_std_string: {
const std::string &S = getArgStdStr(ArgNo);
assert(ModifierLen == 0 && "No modifiers for strings yet");
- OutStr.append(S.begin(), S.end());
+ pushEscapedString(S, OutStr);
break;
}
case DiagnosticsEngine::ak_c_string: {
@@ -934,8 +975,7 @@ FormatDiagnostic(const char *DiagStr, const char *DiagEnd,
// Don't crash if get passed a null pointer by accident.
if (!S)
S = "(null)";
-
- OutStr.append(S, S + strlen(S));
+ pushEscapedString(S, OutStr);
break;
}
// ---- INTEGERS ----
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 7ed338d49a869..8681571eaea42 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -16592,8 +16592,13 @@ Decl *Sema::BuildStaticAssertDeclaration(SourceLocation StaticAssertLoc,
if (!Failed && !Cond) {
SmallString<256> MsgBuffer;
llvm::raw_svector_ostream Msg(MsgBuffer);
- if (AssertMessage)
- AssertMessage->printPretty(Msg, nullptr, getPrintingPolicy());
+ if (AssertMessage) {
+ const auto *MsgStr = cast<StringLiteral>(AssertMessage);
+ if (MsgStr->isAscii())
+ Msg << '"' << MsgStr->getString() << '"';
+ else
+ MsgStr->printPretty(Msg, nullptr, getPrintingPolicy());
+ }
Expr *InnerCond = nullptr;
std::string InnerCondDescription;
diff --git a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
index 40472ccfe7e66..cddf206728b1a 100644
--- a/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/PaddingChecker.cpp
@@ -332,10 +332,10 @@ class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
}
Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
- << OptimalPad.getQuantity() << " is optimal). \n"
- << "Optimal fields order: \n";
+ << OptimalPad.getQuantity() << " is optimal). "
+ << "Optimal fields order: ";
for (const auto *FD : OptimalFieldsOrder)
- Os << FD->getName() << ", \n";
+ Os << FD->getName() << ", ";
Os << "consider reordering the fields or adding explicit padding "
"members.";
diff --git a/clang/test/Lexer/null-character-in-literal.c b/clang/test/Lexer/null-character-in-literal.c
index a479547536762..79e5f251e84be 100644
Binary files a/clang/test/Lexer/null-character-in-literal.c and b/clang/test/Lexer/null-character-in-literal.c
diff er
diff --git a/clang/test/Misc/diag-special-chars.c b/clang/test/Misc/diag-special-chars.c
index e656a09b42799..d514c596d842a 100644
--- a/clang/test/Misc/diag-special-chars.c
+++ b/clang/test/Misc/diag-special-chars.c
@@ -5,7 +5,7 @@
// marker character for diagnostic printing. Ensure diagnostics involving
// this character does not cause problems with the diagnostic printer.
#error Hi Bye
-//expected-error at -1 {{Hi Bye}}
+// expected-error at -1 {{Hi <U+007F> Bye}}
-// CHECK: error: Hi Bye
+// CHECK: error: Hi <U+007F> Bye
// CHECK: #error Hi <U+007F> Bye
diff --git a/clang/test/Misc/wrong-encoding.c b/clang/test/Misc/wrong-encoding.c
index 9a303ec7e7e9e..4d8aa94ffffb2 100644
--- a/clang/test/Misc/wrong-encoding.c
+++ b/clang/test/Misc/wrong-encoding.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value %s 2>&1 | FileCheck -strict-whitespace %s
+// RUN: %clang_cc1 -fsyntax-only -Wno-unused-value %s 2>&1 | FileCheck %s
// REQUIRES: asserts
void foo(void) {
@@ -13,7 +13,7 @@ void foo(void) {
// CHECK: {{^ \^~~~}}
"xxé¿¿¿d";
-// CHECK: {{^ "xx<U\+9FFF><BF>d";}}
+// CHECK: {{^ "xxé¿¿<BF>d";}}
// CHECK: {{^ \^~~~}}
"xxé¿bcd";
diff --git a/clang/test/SemaCXX/static-assert.cpp b/clang/test/SemaCXX/static-assert.cpp
index affc0c32a7a89..f6ac64b49d95d 100644
--- a/clang/test/SemaCXX/static-assert.cpp
+++ b/clang/test/SemaCXX/static-assert.cpp
@@ -30,11 +30,25 @@ S<int> s2;
static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}}
static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}}
-// FIXME: render this as u8"\u03A9"
+
static_assert(false, u8"Ω"); // expected-error {{static_assert failed u8"\316\251"}}
static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}}
static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}}
+static_assert(false, R"(a
+\tb
+c
+)"); // expected-error at -3 {{static_assert failed "a\n\tb\nc\n"}}
+
+static_assert(false, "\u0080\u0081\u0082\u0083\u0099\u009A\u009B\u009C\u009D\u009E\u009F");
+// expected-error at -1 {{static_assert failed "<U+0080><U+0081><U+0082><U+0083><U+0099><U+009A><U+009B><U+009C><U+009D><U+009E><U+009F>"}}
+
+//! Contains RTL/LTR marks
+static_assert(false, "\u200Eabc\u200Fdef\u200Fgh"); // expected-error {{static_assert failed "âabcâdefâgh"}}
+
+//! Contains ZWJ/regional indicators
+static_assert(false, "ð³ï¸âð ð´ó §ó ¢ó ¥ó ®ó §ó ¿ ðªðº"); // expected-error {{static_assert failed "ð³ï¸âð ð´ó §ó ¢ó ¥ó ®ó §ó ¿ ðªðº"}}
+
template<typename T> struct AlwaysFails {
// Only give one error here.
static_assert(false, ""); // expected-error {{static_assert failed}}
diff --git a/llvm/include/llvm/Support/Unicode.h b/llvm/include/llvm/Support/Unicode.h
index eb811daeee345..729775431e16f 100644
--- a/llvm/include/llvm/Support/Unicode.h
+++ b/llvm/include/llvm/Support/Unicode.h
@@ -34,19 +34,13 @@ enum ColumnWidthErrors {
/// terminal, so we define the semantic that should be suitable for generic case
/// of a terminal capable to output Unicode characters.
///
-/// All characters from the Unicode code point range are considered printable
-/// except for:
-/// * C0 and C1 control character ranges;
-/// * default ignorable code points as per 5.21 of
-/// http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf
-/// except for U+00AD SOFT HYPHEN, as it's actually displayed on most
-/// terminals;
-/// * format characters (category = Cf);
-/// * surrogates (category = Cs);
-/// * unassigned characters (category = Cn).
+/// Printable codepoints are those in the categories L, M, N, P, S and Zs
/// \return true if the character is considered printable.
bool isPrintable(int UCS);
+// Formatting codepoints are codepoints in the Cf category.
+bool isFormatting(int UCS);
+
/// Gets the number of positions the UTF8-encoded \p Text is likely to occupy
/// when output on a terminal ("character width"). This depends on the
/// implementation of the terminal, and there's no standard definition of
diff --git a/llvm/lib/Support/Unicode.cpp b/llvm/lib/Support/Unicode.cpp
index bb6e75555b4c9..1037103030947 100644
--- a/llvm/lib/Support/Unicode.cpp
+++ b/llvm/lib/Support/Unicode.cpp
@@ -19,197 +19,271 @@ namespace llvm {
namespace sys {
namespace unicode {
+/// Unicode code points of the categories L, M, N, P, S and Zs are considered
+/// printable.
+/// In addition, U+00AD SOFT HYPHEN is also considered printable, as
+/// it's actually displayed on most terminals. \return true if the character is
+/// considered printable.
bool isPrintable(int UCS) {
- // Sorted list of non-overlapping intervals of code points that are not
- // supposed to be printable.
- static const UnicodeCharRange NonPrintableRanges[] = {
- { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F },
- { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B },
- { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 },
- { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 },
- { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF },
- { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D },
- { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C },
- { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F },
- { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F },
- { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF },
- { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 },
- { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 },
- { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB },
- { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 },
- { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 },
- { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E },
- { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 },
- { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B },
- { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A },
- { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D },
- { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 },
- { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 },
- { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB },
- { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF },
- { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 },
- { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 },
- { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 },
- { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A },
- { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E },
- { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 },
- { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 },
- { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 },
- { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD },
- { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF },
- { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 },
- { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 },
- { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C },
- { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 },
- { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 },
- { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 },
- { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 },
- { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 },
- { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD },
- { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 },
- { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D },
- { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 },
- { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F },
- { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 },
- { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 },
- { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 },
- { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 },
- { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E },
- { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 },
- { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 },
- { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 },
- { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC },
- { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 },
- { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB },
- { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 },
- { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD },
- { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC },
- { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 },
- { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 },
- { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F },
- { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF },
- { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 },
- { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C },
- { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF },
- { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D },
- { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F },
- { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F },
- { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF },
- { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F },
- { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF },
- { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F },
- { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F },
- { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF },
- { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F },
- { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F },
- { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F },
- { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C },
- { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF },
- { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F },
- { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 },
- { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E },
- { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 },
- { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 },
- { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F },
- { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 },
- { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF },
- { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF },
- { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 },
- { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F },
- { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 },
- { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E },
- { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 },
- { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF },
- { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 },
- { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A },
- { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
- { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 },
- { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F },
- { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F },
- { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF },
- { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F },
- { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F },
- { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F },
- { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD },
- { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E },
- { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD },
- { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F },
- { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA },
- { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 },
- { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF },
- { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF },
- { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F },
- { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C },
- { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F },
- { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 },
- { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF },
- { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F },
- { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F },
- { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 },
- { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 },
- { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF },
- { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF },
- { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B },
- { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F },
- { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 },
- { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F },
- { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F },
- { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E },
- { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F },
- { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 },
- { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E },
- { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E },
- { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD },
- { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B },
- { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 },
- { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F },
- { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 },
- { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F },
- { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F },
- { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF },
- { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F },
- { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF },
- { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F },
- { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF },
- { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF },
- { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 },
- { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF },
- { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 },
- { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 },
- { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA },
- { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 },
- { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D },
- { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 },
- { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 },
- { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 },
- { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 },
- { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 },
- { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 },
- { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C },
- { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 },
- { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C },
- { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 },
- { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 },
- { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F },
- { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 },
- { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF },
- { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 },
- { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF },
- { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F },
- { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F },
- { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F },
- { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F },
- { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF },
- { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 },
- { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F },
- { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F },
- { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF },
- { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 },
- { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF }
- };
- static const UnicodeCharSet NonPrintables(NonPrintableRanges);
+ // https://unicode.org/Public/14.0.0/ucdxml/
+ static const UnicodeCharRange PrintableRanges[] = {
+ {0x0020, 0x007E}, {0x00A0, 0x00AC}, {0x00AE, 0x0377},
+ {0x037A, 0x037F}, {0x0384, 0x038A}, {0x038C, 0x038C},
+ {0x038E, 0x03A1}, {0x03A3, 0x052F}, {0x0531, 0x0556},
+ {0x0559, 0x058A}, {0x058D, 0x058F}, {0x0591, 0x05C7},
+ {0x05D0, 0x05EA}, {0x05EF, 0x05F4}, {0x0606, 0x061B},
+ {0x061D, 0x06DC}, {0x06DE, 0x070D}, {0x0710, 0x074A},
+ {0x074D, 0x07B1}, {0x07C0, 0x07FA}, {0x07FD, 0x082D},
+ {0x0830, 0x083E}, {0x0840, 0x085B}, {0x085E, 0x085E},
+ {0x0860, 0x086A}, {0x0870, 0x088E}, {0x0898, 0x08E1},
+ {0x08E3, 0x0983}, {0x0985, 0x098C}, {0x098F, 0x0990},
+ {0x0993, 0x09A8}, {0x09AA, 0x09B0}, {0x09B2, 0x09B2},
+ {0x09B6, 0x09B9}, {0x09BC, 0x09C4}, {0x09C7, 0x09C8},
+ {0x09CB, 0x09CE}, {0x09D7, 0x09D7}, {0x09DC, 0x09DD},
+ {0x09DF, 0x09E3}, {0x09E6, 0x09FE}, {0x0A01, 0x0A03},
+ {0x0A05, 0x0A0A}, {0x0A0F, 0x0A10}, {0x0A13, 0x0A28},
+ {0x0A2A, 0x0A30}, {0x0A32, 0x0A33}, {0x0A35, 0x0A36},
+ {0x0A38, 0x0A39}, {0x0A3C, 0x0A3C}, {0x0A3E, 0x0A42},
+ {0x0A47, 0x0A48}, {0x0A4B, 0x0A4D}, {0x0A51, 0x0A51},
+ {0x0A59, 0x0A5C}, {0x0A5E, 0x0A5E}, {0x0A66, 0x0A76},
+ {0x0A81, 0x0A83}, {0x0A85, 0x0A8D}, {0x0A8F, 0x0A91},
+ {0x0A93, 0x0AA8}, {0x0AAA, 0x0AB0}, {0x0AB2, 0x0AB3},
+ {0x0AB5, 0x0AB9}, {0x0ABC, 0x0AC5}, {0x0AC7, 0x0AC9},
+ {0x0ACB, 0x0ACD}, {0x0AD0, 0x0AD0}, {0x0AE0, 0x0AE3},
+ {0x0AE6, 0x0AF1}, {0x0AF9, 0x0AFF}, {0x0B01, 0x0B03},
+ {0x0B05, 0x0B0C}, {0x0B0F, 0x0B10}, {0x0B13, 0x0B28},
+ {0x0B2A, 0x0B30}, {0x0B32, 0x0B33}, {0x0B35, 0x0B39},
+ {0x0B3C, 0x0B44}, {0x0B47, 0x0B48}, {0x0B4B, 0x0B4D},
+ {0x0B55, 0x0B57}, {0x0B5C, 0x0B5D}, {0x0B5F, 0x0B63},
+ {0x0B66, 0x0B77}, {0x0B82, 0x0B83}, {0x0B85, 0x0B8A},
+ {0x0B8E, 0x0B90}, {0x0B92, 0x0B95}, {0x0B99, 0x0B9A},
+ {0x0B9C, 0x0B9C}, {0x0B9E, 0x0B9F}, {0x0BA3, 0x0BA4},
+ {0x0BA8, 0x0BAA}, {0x0BAE, 0x0BB9}, {0x0BBE, 0x0BC2},
+ {0x0BC6, 0x0BC8}, {0x0BCA, 0x0BCD}, {0x0BD0, 0x0BD0},
+ {0x0BD7, 0x0BD7}, {0x0BE6, 0x0BFA}, {0x0C00, 0x0C0C},
+ {0x0C0E, 0x0C10}, {0x0C12, 0x0C28}, {0x0C2A, 0x0C39},
+ {0x0C3C, 0x0C44}, {0x0C46, 0x0C48}, {0x0C4A, 0x0C4D},
+ {0x0C55, 0x0C56}, {0x0C58, 0x0C5A}, {0x0C5D, 0x0C5D},
+ {0x0C60, 0x0C63}, {0x0C66, 0x0C6F}, {0x0C77, 0x0C8C},
+ {0x0C8E, 0x0C90}, {0x0C92, 0x0CA8}, {0x0CAA, 0x0CB3},
+ {0x0CB5, 0x0CB9}, {0x0CBC, 0x0CC4}, {0x0CC6, 0x0CC8},
+ {0x0CCA, 0x0CCD}, {0x0CD5, 0x0CD6}, {0x0CDD, 0x0CDE},
+ {0x0CE0, 0x0CE3}, {0x0CE6, 0x0CEF}, {0x0CF1, 0x0CF2},
+ {0x0D00, 0x0D0C}, {0x0D0E, 0x0D10}, {0x0D12, 0x0D44},
+ {0x0D46, 0x0D48}, {0x0D4A, 0x0D4F}, {0x0D54, 0x0D63},
+ {0x0D66, 0x0D7F}, {0x0D81, 0x0D83}, {0x0D85, 0x0D96},
+ {0x0D9A, 0x0DB1}, {0x0DB3, 0x0DBB}, {0x0DBD, 0x0DBD},
+ {0x0DC0, 0x0DC6}, {0x0DCA, 0x0DCA}, {0x0DCF, 0x0DD4},
+ {0x0DD6, 0x0DD6}, {0x0DD8, 0x0DDF}, {0x0DE6, 0x0DEF},
+ {0x0DF2, 0x0DF4}, {0x0E01, 0x0E3A}, {0x0E3F, 0x0E5B},
+ {0x0E81, 0x0E82}, {0x0E84, 0x0E84}, {0x0E86, 0x0E8A},
+ {0x0E8C, 0x0EA3}, {0x0EA5, 0x0EA5}, {0x0EA7, 0x0EBD},
+ {0x0EC0, 0x0EC4}, {0x0EC6, 0x0EC6}, {0x0EC8, 0x0ECD},
+ {0x0ED0, 0x0ED9}, {0x0EDC, 0x0EDF}, {0x0F00, 0x0F47},
+ {0x0F49, 0x0F6C}, {0x0F71, 0x0F97}, {0x0F99, 0x0FBC},
+ {0x0FBE, 0x0FCC}, {0x0FCE, 0x0FDA}, {0x1000, 0x10C5},
+ {0x10C7, 0x10C7}, {0x10CD, 0x10CD}, {0x10D0, 0x1248},
+ {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258},
+ {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D},
+ {0x1290, 0x12B0}, {0x12B2, 0x12B5}, {0x12B8, 0x12BE},
+ {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6},
+ {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A},
+ {0x135D, 0x137C}, {0x1380, 0x1399}, {0x13A0, 0x13F5},
+ {0x13F8, 0x13FD}, {0x1400, 0x169C}, {0x16A0, 0x16F8},
+ {0x1700, 0x1715}, {0x171F, 0x1736}, {0x1740, 0x1753},
+ {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1772, 0x1773},
+ {0x1780, 0x17DD}, {0x17E0, 0x17E9}, {0x17F0, 0x17F9},
+ {0x1800, 0x180D}, {0x180F, 0x1819}, {0x1820, 0x1878},
+ {0x1880, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E},
+ {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1940, 0x1940},
+ {0x1944, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
+ {0x19B0, 0x19C9}, {0x19D0, 0x19DA}, {0x19DE, 0x1A1B},
+ {0x1A1E, 0x1A5E}, {0x1A60, 0x1A7C}, {0x1A7F, 0x1A89},
+ {0x1A90, 0x1A99}, {0x1AA0, 0x1AAD}, {0x1AB0, 0x1ACE},
+ {0x1B00, 0x1B4C}, {0x1B50, 0x1B7E}, {0x1B80, 0x1BF3},
+ {0x1BFC, 0x1C37}, {0x1C3B, 0x1C49}, {0x1C4D, 0x1C88},
+ {0x1C90, 0x1CBA}, {0x1CBD, 0x1CC7}, {0x1CD0, 0x1CFA},
+ {0x1D00, 0x1F15}, {0x1F18, 0x1F1D}, {0x1F20, 0x1F45},
+ {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59},
+ {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D},
+ {0x1F80, 0x1FB4}, {0x1FB6, 0x1FC4}, {0x1FC6, 0x1FD3},
+ {0x1FD6, 0x1FDB}, {0x1FDD, 0x1FEF}, {0x1FF2, 0x1FF4},
+ {0x1FF6, 0x1FFE}, {0x2000, 0x200A}, {0x2010, 0x2027},
+ {0x202F, 0x205F}, {0x2070, 0x2071}, {0x2074, 0x208E},
+ {0x2090, 0x209C}, {0x20A0, 0x20C0}, {0x20D0, 0x20F0},
+ {0x2100, 0x218B}, {0x2190, 0x2426}, {0x2440, 0x244A},
+ {0x2460, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2CF3},
+ {0x2CF9, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D},
+ {0x2D30, 0x2D67}, {0x2D6F, 0x2D70}, {0x2D7F, 0x2D96},
+ {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6},
+ {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE},
+ {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2DE0, 0x2E5D},
+ {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3}, {0x2F00, 0x2FD5},
+ {0x2FF0, 0x2FFB}, {0x3000, 0x303F}, {0x3041, 0x3096},
+ {0x3099, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E},
+ {0x3190, 0x31E3}, {0x31F0, 0x321E}, {0x3220, 0xA48C},
+ {0xA490, 0xA4C6}, {0xA4D0, 0xA62B}, {0xA640, 0xA6F7},
+ {0xA700, 0xA7CA}, {0xA7D0, 0xA7D1}, {0xA7D3, 0xA7D3},
+ {0xA7D5, 0xA7D9}, {0xA7F2, 0xA82C}, {0xA830, 0xA839},
+ {0xA840, 0xA877}, {0xA880, 0xA8C5}, {0xA8CE, 0xA8D9},
+ {0xA8E0, 0xA953}, {0xA95F, 0xA97C}, {0xA980, 0xA9CD},
+ {0xA9CF, 0xA9D9}, {0xA9DE, 0xA9FE}, {0xAA00, 0xAA36},
+ {0xAA40, 0xAA4D}, {0xAA50, 0xAA59}, {0xAA5C, 0xAAC2},
+ {0xAADB, 0xAAF6}, {0xAB01, 0xAB06}, {0xAB09, 0xAB0E},
+ {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E},
+ {0xAB30, 0xAB6B}, {0xAB70, 0xABED}, {0xABF0, 0xABF9},
+ {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
+ {0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06},
+ {0xFB13, 0xFB17}, {0xFB1D, 0xFB36}, {0xFB38, 0xFB3C},
+ {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
+ {0xFB46, 0xFBC2}, {0xFBD3, 0xFD8F}, {0xFD92, 0xFDC7},
+ {0xFDCF, 0xFDCF}, {0xFDF0, 0xFE19}, {0xFE20, 0xFE52},
+ {0xFE54, 0xFE66}, {0xFE68, 0xFE6B}, {0xFE70, 0xFE74},
+ {0xFE76, 0xFEFC}, {0xFF01, 0xFFBE}, {0xFFC2, 0xFFC7},
+ {0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC},
+ {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD},
+ {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A},
+ {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D},
+ {0x10080, 0x100FA}, {0x10100, 0x10102}, {0x10107, 0x10133},
+ {0x10137, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0},
+ {0x101D0, 0x101FD}, {0x10280, 0x1029C}, {0x102A0, 0x102D0},
+ {0x102E0, 0x102FB}, {0x10300, 0x10323}, {0x1032D, 0x1034A},
+ {0x10350, 0x1037A}, {0x10380, 0x1039D}, {0x1039F, 0x103C3},
+ {0x103C8, 0x103D5}, {0x10400, 0x1049D}, {0x104A0, 0x104A9},
+ {0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527},
+ {0x10530, 0x10563}, {0x1056F, 0x1057A}, {0x1057C, 0x1058A},
+ {0x1058C, 0x10592}, {0x10594, 0x10595}, {0x10597, 0x105A1},
+ {0x105A3, 0x105B1}, {0x105B3, 0x105B9}, {0x105BB, 0x105BC},
+ {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767},
+ {0x10780, 0x10785}, {0x10787, 0x107B0}, {0x107B2, 0x107BA},
+ {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
+ {0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855},
+ {0x10857, 0x1089E}, {0x108A7, 0x108AF}, {0x108E0, 0x108F2},
+ {0x108F4, 0x108F5}, {0x108FB, 0x1091B}, {0x1091F, 0x10939},
+ {0x1093F, 0x1093F}, {0x10980, 0x109B7}, {0x109BC, 0x109CF},
+ {0x109D2, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A13},
+ {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A38, 0x10A3A},
+ {0x10A3F, 0x10A48}, {0x10A50, 0x10A58}, {0x10A60, 0x10A9F},
+ {0x10AC0, 0x10AE6}, {0x10AEB, 0x10AF6}, {0x10B00, 0x10B35},
+ {0x10B39, 0x10B55}, {0x10B58, 0x10B72}, {0x10B78, 0x10B91},
+ {0x10B99, 0x10B9C}, {0x10BA9, 0x10BAF}, {0x10C00, 0x10C48},
+ {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10CFA, 0x10D27},
+ {0x10D30, 0x10D39}, {0x10E60, 0x10E7E}, {0x10E80, 0x10EA9},
+ {0x10EAB, 0x10EAD}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F27},
+ {0x10F30, 0x10F59}, {0x10F70, 0x10F89}, {0x10FB0, 0x10FCB},
+ {0x10FE0, 0x10FF6}, {0x11000, 0x1104D}, {0x11052, 0x11075},
+ {0x1107F, 0x110BC}, {0x110BE, 0x110C2}, {0x110D0, 0x110E8},
+ {0x110F0, 0x110F9}, {0x11100, 0x11134}, {0x11136, 0x11147},
+ {0x11150, 0x11176}, {0x11180, 0x111DF}, {0x111E1, 0x111F4},
+ {0x11200, 0x11211}, {0x11213, 0x1123E}, {0x11280, 0x11286},
+ {0x11288, 0x11288}, {0x1128A, 0x1128D}, {0x1128F, 0x1129D},
+ {0x1129F, 0x112A9}, {0x112B0, 0x112EA}, {0x112F0, 0x112F9},
+ {0x11300, 0x11303}, {0x11305, 0x1130C}, {0x1130F, 0x11310},
+ {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333},
+ {0x11335, 0x11339}, {0x1133B, 0x11344}, {0x11347, 0x11348},
+ {0x1134B, 0x1134D}, {0x11350, 0x11350}, {0x11357, 0x11357},
+ {0x1135D, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374},
+ {0x11400, 0x1145B}, {0x1145D, 0x11461}, {0x11480, 0x114C7},
+ {0x114D0, 0x114D9}, {0x11580, 0x115B5}, {0x115B8, 0x115DD},
+ {0x11600, 0x11644}, {0x11650, 0x11659}, {0x11660, 0x1166C},
+ {0x11680, 0x116B9}, {0x116C0, 0x116C9}, {0x11700, 0x1171A},
+ {0x1171D, 0x1172B}, {0x11730, 0x11746}, {0x11800, 0x1183B},
+ {0x118A0, 0x118F2}, {0x118FF, 0x11906}, {0x11909, 0x11909},
+ {0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x11935},
+ {0x11937, 0x11938}, {0x1193B, 0x11946}, {0x11950, 0x11959},
+ {0x119A0, 0x119A7}, {0x119AA, 0x119D7}, {0x119DA, 0x119E4},
+ {0x11A00, 0x11A47}, {0x11A50, 0x11AA2}, {0x11AB0, 0x11AF8},
+ {0x11C00, 0x11C08}, {0x11C0A, 0x11C36}, {0x11C38, 0x11C45},
+ {0x11C50, 0x11C6C}, {0x11C70, 0x11C8F}, {0x11C92, 0x11CA7},
+ {0x11CA9, 0x11CB6}, {0x11D00, 0x11D06}, {0x11D08, 0x11D09},
+ {0x11D0B, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D},
+ {0x11D3F, 0x11D47}, {0x11D50, 0x11D59}, {0x11D60, 0x11D65},
+ {0x11D67, 0x11D68}, {0x11D6A, 0x11D8E}, {0x11D90, 0x11D91},
+ {0x11D93, 0x11D98}, {0x11DA0, 0x11DA9}, {0x11EE0, 0x11EF8},
+ {0x11FB0, 0x11FB0}, {0x11FC0, 0x11FF1}, {0x11FFF, 0x12399},
+ {0x12400, 0x1246E}, {0x12470, 0x12474}, {0x12480, 0x12543},
+ {0x12F90, 0x12FF2}, {0x13000, 0x1342E}, {0x14400, 0x14646},
+ {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16A60, 0x16A69},
+ {0x16A6E, 0x16ABE}, {0x16AC0, 0x16AC9}, {0x16AD0, 0x16AED},
+ {0x16AF0, 0x16AF5}, {0x16B00, 0x16B45}, {0x16B50, 0x16B59},
+ {0x16B5B, 0x16B61}, {0x16B63, 0x16B77}, {0x16B7D, 0x16B8F},
+ {0x16E40, 0x16E9A}, {0x16F00, 0x16F4A}, {0x16F4F, 0x16F87},
+ {0x16F8F, 0x16F9F}, {0x16FE0, 0x16FE4}, {0x16FF0, 0x16FF1},
+ {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
+ {0x1AFF0, 0x1AFF3}, {0x1AFF5, 0x1AFFB}, {0x1AFFD, 0x1AFFE},
+ {0x1B000, 0x1B122}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167},
+ {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C},
+ {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1BC9C, 0x1BC9F},
+ {0x1CF00, 0x1CF2D}, {0x1CF30, 0x1CF46}, {0x1CF50, 0x1CFC3},
+ {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D172},
+ {0x1D17B, 0x1D1EA}, {0x1D200, 0x1D245}, {0x1D2E0, 0x1D2F3},
+ {0x1D300, 0x1D356}, {0x1D360, 0x1D378}, {0x1D400, 0x1D454},
+ {0x1D456, 0x1D49C}, {0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2},
+ {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9},
+ {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505},
+ {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514}, {0x1D516, 0x1D51C},
+ {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544},
+ {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5},
+ {0x1D6A8, 0x1D7CB}, {0x1D7CE, 0x1DA8B}, {0x1DA9B, 0x1DA9F},
+ {0x1DAA1, 0x1DAAF}, {0x1DF00, 0x1DF1E}, {0x1E000, 0x1E006},
+ {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024},
+ {0x1E026, 0x1E02A}, {0x1E100, 0x1E12C}, {0x1E130, 0x1E13D},
+ {0x1E140, 0x1E149}, {0x1E14E, 0x1E14F}, {0x1E290, 0x1E2AE},
+ {0x1E2C0, 0x1E2F9}, {0x1E2FF, 0x1E2FF}, {0x1E7E0, 0x1E7E6},
+ {0x1E7E8, 0x1E7EB}, {0x1E7ED, 0x1E7EE}, {0x1E7F0, 0x1E7FE},
+ {0x1E800, 0x1E8C4}, {0x1E8C7, 0x1E8D6}, {0x1E900, 0x1E94B},
+ {0x1E950, 0x1E959}, {0x1E95E, 0x1E95F}, {0x1EC71, 0x1ECB4},
+ {0x1ED01, 0x1ED3D}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F},
+ {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
+ {0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39},
+ {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47},
+ {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F},
+ {0x1EE51, 0x1EE52}, {0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57},
+ {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D},
+ {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64},
+ {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72}, {0x1EE74, 0x1EE77},
+ {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89},
+ {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9},
+ {0x1EEAB, 0x1EEBB}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B},
+ {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF},
+ {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F100, 0x1F1AD},
+ {0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248},
+ {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7},
+ {0x1F6DD, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773},
+ {0x1F780, 0x1F7D8}, {0x1F7E0, 0x1F7EB}, {0x1F7F0, 0x1F7F0},
+ {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859},
+ {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1},
+ {0x1F900, 0x1FA53}, {0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74},
+ {0x1FA78, 0x1FA7C}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAAC},
+ {0x1FAB0, 0x1FABA}, {0x1FAC0, 0x1FAC5}, {0x1FAD0, 0x1FAD9},
+ {0x1FAE0, 0x1FAE7}, {0x1FAF0, 0x1FAF6}, {0x1FB00, 0x1FB92},
+ {0x1FB94, 0x1FBCA}, {0x1FBF0, 0x1FBF9}, {0x20000, 0x2A6DF},
+ {0x2A700, 0x2B738}, {0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1},
+ {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
+ {0xE0100, 0xE01EF}};
+
+ static const UnicodeCharSet Printables(PrintableRanges);
+ // Clang special cases 0x00AD (SOFT HYPHEN) which is rendered as an actual
+ // hyphen in most terminals.
+ return UCS == 0x00AD || Printables.contains(UCS);
+}
+
+/// Unicode code points of the Cf category are considered
+/// fornatting characters.
+bool isFormatting(int UCS) {
+
+ // https://unicode.org/Public/14.0.0/ucdxml/
+ static const UnicodeCharRange Cf[] = {
+ {0x00AD, 0x00AD}, {0x0600, 0x0605}, {0x061C, 0x061C},
+ {0x06DD, 0x06DD}, {0x070F, 0x070F}, {0x0890, 0x0891},
+ {0x08E2, 0x08E2}, {0x180E, 0x180E}, {0x200B, 0x200F},
+ {0x202A, 0x202E}, {0x2060, 0x2064}, {0x2066, 0x206F},
+ {0xFEFF, 0xFEFF}, {0xFFF9, 0xFFFB}, {0x110BD, 0x110BD},
+ {0x110CD, 0x110CD}, {0x13430, 0x13438}, {0x1BCA0, 0x1BCA3},
+ {0x1D173, 0x1D17A}, {0xE0001, 0xE0001}, {0xE0020, 0xE007F}};
- return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS);
+ static const UnicodeCharSet Format(Cf);
+ return Format.contains(UCS);
}
/// Gets the number of positions a character is likely to occupy when output
More information about the cfe-commits
mailing list