[libcxx-commits] [PATCH] D149672: [libc++][format] Fixes UTF-8 continuation.
Mark de Wever via Phabricator via libcxx-commits
libcxx-commits at lists.llvm.org
Fri May 19 08:23:02 PDT 2023
Mordante updated this revision to Diff 523791.
Mordante added a comment.
Rebased.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D149672/new/
https://reviews.llvm.org/D149672
Files:
libcxx/include/__format/unicode.h
libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp
Index: libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp
===================================================================
--- libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp
+++ libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp
@@ -272,6 +272,31 @@
check(SV("*ZZZZ\xefZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xefZZZZ"));
check(SV("*ZZZZ\xffZZZZ*"), SV("{:*^11}"), SV("ZZZZ\xffZZZZ"));
+ // Invalid continuations
+ check(SV("\xc2\x00"), SV("{}"), SV("\xc2\x00")); // 0b0000'0000
+ check(SV("\xc2\x40"), SV("{}"), SV("\xc2\x40")); // 0b0100'0000
+ check(SV("\xc2\xc0"), SV("{}"), SV("\xc2\xc0")); // 0b1100'0000
+
+ check(SV("\xe0\x00\x80"), SV("{}"), SV("\xe0\x00\x80"));
+ check(SV("\xe0\x40\x80"), SV("{}"), SV("\xe0\x40\x80"));
+ check(SV("\xe0\xc0\x80"), SV("{}"), SV("\xe0\xc0\x80"));
+
+ check(SV("\xe0\x80\x00"), SV("{}"), SV("\xe0\x80\x00"));
+ check(SV("\xe0\x80\x40"), SV("{}"), SV("\xe0\x80\x40"));
+ check(SV("\xe0\x80\xc0"), SV("{}"), SV("\xe0\x80\xc0"));
+
+ check(SV("\xf0\x80\x80\x00"), SV("{}"), SV("\xf0\x80\x80\x00"));
+ check(SV("\xf0\x80\x80\x40"), SV("{}"), SV("\xf0\x80\x80\x40"));
+ check(SV("\xf0\x80\x80\xc0"), SV("{}"), SV("\xf0\x80\x80\xc0"));
+
+ check(SV("\xf0\x80\x00\x80"), SV("{}"), SV("\xf0\x80\x00\x80"));
+ check(SV("\xf0\x80\x40\x80"), SV("{}"), SV("\xf0\x80\x40\x80"));
+ check(SV("\xf0\x80\xc0\x80"), SV("{}"), SV("\xf0\x80\xc0\x80"));
+
+ check(SV("\xf0\x00\x80\x80"), SV("{}"), SV("\xf0\x00\x80\x80"));
+ check(SV("\xf0\x40\x80\x80"), SV("{}"), SV("\xf0\x40\x80\x80"));
+ check(SV("\xf0\xc0\x80\x80"), SV("{}"), SV("\xf0\xc0\x80\x80"));
+
// Premature end.
check(SV("*ZZZZ\xef\xf5*"), SV("{:*^8}"), SV("ZZZZ\xef\xf5"));
check(SV("*ZZZZ\xef\xf5ZZZZ*"), SV("{:*^12}"), SV("ZZZZ\xef\xf5ZZZZ"));
Index: libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
===================================================================
--- libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
+++ libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp
@@ -504,6 +504,13 @@
"\xf7\xbf\xbf"
"a");
+ // http://unicode.org/review/pr-121.html
+ test_format(R"("a\x{f1}\x{80}\x{80}\x{e1}\x{80}\x{c2}b")"sv,
+ "{:?}",
+ "a"
+ "\xf1\x80\x80\xe1\x80\xc2"
+ "b");
+
// Code unit out of range
test_format(R"("\u{10ffff}")"sv, "{:?}", "\xf4\x8f\xbf\xbf"); // last valid code point
test_format(R"("\x{f4}\x{90}\x{80}\x{80}")"sv, "{:?}", "\xf4\x90\x80\x80"); // first invalid code point
Index: libcxx/include/__format/unicode.h
===================================================================
--- libcxx/include/__format/unicode.h
+++ libcxx/include/__format/unicode.h
@@ -105,7 +105,7 @@
requires same_as<iter_value_t<_Iterator>, char>
_LIBCPP_HIDE_FROM_ABI constexpr bool __is_continuation(_Iterator __char, int __count) {
do {
- if ((*__char & 0b1000'0000) != 0b1000'0000)
+ if ((*__char & 0b1100'0000) != 0b1000'0000)
return false;
--__count;
++__char;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D149672.523791.patch
Type: text/x-patch
Size: 3225 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/libcxx-commits/attachments/20230519/aa3f9352/attachment.bin>
More information about the libcxx-commits
mailing list