[libcxx-commits] [libcxx] ac7031b - [libc++][format] Implement Unicode support.
Mark de Wever via libcxx-commits
libcxx-commits at lists.llvm.org
Sat Oct 2 02:57:46 PDT 2021
Author: Mark de Wever
Date: 2021-10-02T11:57:40+02:00
New Revision: ac7031b2b2fac247afe2d343940ec8e59b4b5fac
URL: https://github.com/llvm/llvm-project/commit/ac7031b2b2fac247afe2d343940ec8e59b4b5fac
DIFF: https://github.com/llvm/llvm-project/commit/ac7031b2b2fac247afe2d343940ec8e59b4b5fac.diff
LOG: [libc++][format] Implement Unicode support.
This adds the width estimation functions to the std-format-spec.
Implements parts of:
- P0645 Text Formatting
- P1868 width: clarifying units of width and precision in std::format
Reviewed By: #libc, ldionne, vitaut
Differential Revision: https://reviews.llvm.org/D103413
Added:
libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_non_unicode.pass.cpp
libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp
Modified:
libcxx/docs/Status/Cxx20Papers.csv
libcxx/include/__format/parser_std_format_spec.h
Removed:
################################################################################
diff --git a/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp b/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
new file mode 100644
index 0000000000000..156eaebd44f82
--- /dev/null
+++ b/libcxx/benchmarks/std_format_spec_string_unicode.bench.cpp
@@ -0,0 +1,196 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_HAS_NO_UNICODE
+
+#include <array>
+#include <format>
+
+#include "benchmark/benchmark.h"
+
+#include "test_macros.h"
+
+template <class CharT, size_t N>
+class tester {
+ static constexpr size_t size_ = N - 1;
+ std::array<CharT, 100 * size_> data_;
+
+public:
+ explicit constexpr tester(const CharT (&input)[N]) {
+ auto it = data_.begin();
+ for (int i = 0; i < 100; ++i)
+ it = std::copy_n(input, size_, it);
+ }
+
+ constexpr size_t size() const noexcept { return data_.size(); }
+ constexpr const CharT* begin() const noexcept { return data_.begin(); }
+ constexpr const CharT* end() const noexcept { return data_.end(); }
+
+ void test(benchmark::State& state) const {
+ for (auto _ : state)
+ benchmark::DoNotOptimize(std::__format_spec::__get_string_alignment(
+ begin(), end(), 1'000'000, 1'000'000));
+ state.SetItemsProcessed(state.iterations() * size());
+ }
+};
+
+#define TEST(u8) \
+ if constexpr (std::same_as<CharT, char>) { \
+ constexpr auto p = tester{u8}; \
+ p.test(state); \
+ } else if constexpr (std::same_as<CharT, char16_t>) { \
+ constexpr auto p = tester{TEST_CONCAT(u, u8)}; \
+ p.test(state); \
+ } else { \
+ constexpr auto p = tester{TEST_CONCAT(U, u8)}; \
+ p.test(state); \
+ }
+
+template <class CharT>
+static void BM_EstimateLengthNoMultiByte(benchmark::State& state) {
+ TEST("The quick brown fox jumps over the lazy dog");
+}
+
+template <class CharT>
+static void BM_EstimateLengthTwoByteDE(benchmark::State& state) {
+ static_assert(sizeof("Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich") == 67);
+
+ // https://en.wikipedia.org/wiki/Pangram
+ TEST("Victor jagt zwölf Boxkämpfer quer über den großen Sylter Deich");
+}
+
+template <class CharT>
+static void BM_EstimateLengthTwoBytePL(benchmark::State& state) {
+ static_assert(sizeof("Stróż pchnął kość w quiz gędźb vel fax myjń") == 53);
+
+ // https://en.wikipedia.org/wiki/Pangram
+ TEST("Stróż pchnął kość w quiz gędźb vel fax myjń");
+}
+
+// All values below are 1100, which is is the first multi column sequence.
+template <class CharT>
+static void BM_EstimateLengthThreeByteSingleColumnLow(benchmark::State& state) {
+ static_assert(sizeof("\u0800\u0801\u0802\u0803\u0804\u0805\u0806\u0807"
+ "\u0808\u0809\u080a\u080b\u080c\u080d\u080e\u080f") ==
+ 49);
+
+ TEST("\u0800\u0801\u0802\u0803\u0804\u0805\u0806\u0807"
+ "\u0808\u0809\u080a\u080b\u080c\u080d\u080e\u080f");
+}
+
+template <class CharT>
+static void
+BM_EstimateLengthThreeByteSingleColumnHigh(benchmark::State& state) {
+ static_assert(sizeof("\u1800\u1801\u1802\u1803\u1804\u1805\u1806\u1807"
+ "\u1808\u1809\u180a\u180b\u180c\u180d\u180e\u180f") ==
+ 49);
+
+ TEST("\u1800\u1801\u1802\u1803\u1804\u1805\u1806\u1807"
+ "\u1808\u1809\u180a\u180b\u180c\u180d\u180e\u180f");
+}
+
+template <class CharT>
+static void BM_EstimateLengthThreeByteDoubleColumn(benchmark::State& state) {
+ static_assert(sizeof("\u1100\u0801\u0802\u0803\u0804\u0805\u0806\u0807"
+ "\u1108\u0809\u080a\u080b\u080c\u080d\u080e\u080f") ==
+ 49);
+
+ TEST("\u1100\u0801\u0802\u0803\u0804\u0805\u0806\u0807"
+ "\u1108\u0809\u080a\u080b\u080c\u080d\u080e\u080f");
+}
+
+template <class CharT>
+static void BM_EstimateLengthThreeByte(benchmark::State& state) {
+ static_assert(sizeof("\u1400\u1501\ubbbb\uff00\u0800\u4099\uabcd\u4000"
+ "\u8ead\ubeef\u1111\u4987\u4321\uffff\u357a\ud50e") ==
+ 49);
+
+ TEST("\u1400\u1501\ubbbb\uff00\u0800\u4099\uabcd\u4000"
+ "\u8ead\ubeef\u1111\u4987\u4321\uffff\u357a\ud50e");
+}
+
+template <class CharT>
+static void BM_EstimateLengthFourByteSingleColumn(benchmark::State& state) {
+ static_assert(sizeof("\U00010000\U00010001\U00010002\U00010003"
+ "\U00010004\U00010005\U00010006\U00010007"
+ "\U00010008\U00010009\U0001000a\U0001000b"
+ "\U0001000c\U0001000d\U0001000e\U0001000f") == 65);
+
+ TEST("\U00010000\U00010001\U00010002\U00010003"
+ "\U00010004\U00010005\U00010006\U00010007"
+ "\U00010008\U00010009\U0001000a\U0001000b"
+ "\U0001000c\U0001000d\U0001000e\U0001000f");
+}
+
+template <class CharT>
+static void BM_EstimateLengthFourByteDoubleColumn(benchmark::State& state) {
+ static_assert(sizeof("\U00020000\U00020002\U00020002\U00020003"
+ "\U00020004\U00020005\U00020006\U00020007"
+ "\U00020008\U00020009\U0002000a\U0002000b"
+ "\U0002000c\U0002000d\U0002000e\U0002000f") == 65);
+
+ TEST("\U00020000\U00020002\U00020002\U00020003"
+ "\U00020004\U00020005\U00020006\U00020007"
+ "\U00020008\U00020009\U0002000a\U0002000b"
+ "\U0002000c\U0002000d\U0002000e\U0002000f");
+}
+
+template <class CharT>
+static void BM_EstimateLengthFourByte(benchmark::State& state) {
+ static_assert(sizeof("\U00010000\U00010001\U00010002\U00010003"
+ "\U00020004\U00020005\U00020006\U00020007"
+ "\U00010008\U00010009\U0001000a\U0001000b"
+ "\U0002000c\U0002000d\U0002000e\U0002000f") == 65);
+
+ TEST("\U00010000\U00010001\U00010002\U00010003"
+ "\U00020004\U00020005\U00020006\U00020007"
+ "\U00010008\U00010009\U0001000a\U0001000b"
+ "\U0002000c\U0002000d\U0002000e\U0002000f");
+}
+
+BENCHMARK_TEMPLATE(BM_EstimateLengthNoMultiByte, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthTwoByteDE, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthTwoBytePL, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnLow, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnHigh, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteDoubleColumn, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByte, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteSingleColumn, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteDoubleColumn, char);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByte, char);
+
+BENCHMARK_TEMPLATE(BM_EstimateLengthNoMultiByte, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthTwoByteDE, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthTwoBytePL, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnLow, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnHigh, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteDoubleColumn, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByte, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteSingleColumn, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteDoubleColumn, char16_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByte, char16_t);
+
+BENCHMARK_TEMPLATE(BM_EstimateLengthNoMultiByte, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthTwoByteDE, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthTwoBytePL, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnLow, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteSingleColumnHigh, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByteDoubleColumn, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthThreeByte, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteSingleColumn, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByteDoubleColumn, char32_t);
+BENCHMARK_TEMPLATE(BM_EstimateLengthFourByte, char32_t);
+
+int main(int argc, char** argv) {
+ benchmark::Initialize(&argc, argv);
+ if (benchmark::ReportUnrecognizedArguments(argc, argv))
+ return 1;
+
+ benchmark::RunSpecifiedBenchmarks();
+}
+#else
+int main(int, char**) { return 0; }
+#endif
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index b5224ea9ef386..ee59d657b9f21 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -171,7 +171,7 @@
"`P1460 <https://wg21.link/P1460>`__","LWG","Mandating the Standard Library: Clause 20 - Utilities library","Prague","* *",""
"`P1739 <https://wg21.link/P1739>`__","LWG","Avoid template bloat for safe_ranges in combination with ""subrange-y"" view adaptors","Prague","* *",""
"`P1831 <https://wg21.link/P1831>`__","LWG","Deprecating volatile: library","Prague","* *",""
-"`P1868 <https://wg21.link/P1868>`__","LWG","width: clarifying units of width and precision in std::format","Prague","* *",""
+"`P1868 <https://wg21.link/P1868>`__","LWG","width: clarifying units of width and precision in std::format","Prague","|In Progress|",""
"`P1908 <https://wg21.link/P1908>`__","CWG","Reserving Attribute Namespaces for Future Use","Prague","* *",""
"`P1937 <https://wg21.link/P1937>`__","CWG","Fixing inconsistencies between constexpr and consteval functions","Prague","* *",""
"`P1956 <https://wg21.link/P1956>`__","LWG","On the names of low-level bit manipulation functions","Prague","|Complete|","12.0"
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index a383a1ff50b48..aab565e63e9e8 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -10,12 +10,15 @@
#ifndef _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
#define _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
+#include <__algorithm/find_if.h>
+#include <__algorithm/min.h>
#include <__config>
#include <__debug>
#include <__format/format_arg.h>
#include <__format/format_error.h>
#include <__format/format_string.h>
#include <__variant/monostate.h>
+#include <bit>
#include <concepts>
#include <cstdint>
#include <type_traits>
@@ -24,6 +27,9 @@
# pragma GCC system_header
#endif
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
_LIBCPP_BEGIN_NAMESPACE_STD
#if _LIBCPP_STD_VER > 17
@@ -711,6 +717,462 @@ class _LIBCPP_TEMPLATE_VIS __parser_integral
// TODO FMT Add a parser for floating-point values.
// TODO FMT Add a parser for pointer values.
+/** Helper struct returned from @ref __get_string_alignment. */
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS __string_alignment {
+ /** Points beyond the last character to write to the output. */
+ const _CharT* __last;
+ /**
+ * The estimated number of columns in the output or 0.
+ *
+ * Only when the output needs to be aligned it's required to know the exact
+ * number of columns in the output. So if the formatted output has only a
+ * minimum width the exact size isn't important. It's only important to know
+ * the minimum has been reached. The minimum width is the width specified in
+ * the format-spec.
+ *
+ * For example in this code @code std::format("{:10}", MyString); @endcode
+ * the width estimation can stop once the algorithm has determined the output
+ * width is 10 columns.
+ *
+ * So if:
+ * * @ref __align == @c true the @ref __size is the estimated number of
+ * columns required.
+ * * @ref __align == @c false the @ref __size is the estimated number of
+ * columns required or 0 when the estimation algorithm stopped prematurely.
+ */
+ ptr
diff _t __size;
+ /**
+ * Does the output need to be aligned.
+ *
+ * When alignment is needed the output algorithm needs to add the proper
+ * padding. Else the output algorithm just needs to copy the input up to
+ * @ref __last.
+ */
+ bool __align;
+};
+
+#ifndef _LIBCPP_HAS_NO_UNICODE
+namespace __detail {
+
+/**
+ * Unicode column width estimates.
+ *
+ * Unicode can be stored in several formats: UTF-8, UTF-16, and UTF-32.
+ * Depending on format the relation between the number of code units stored and
+ * the number of output columns
diff ers. The first relation is the number of
+ * code units forming a code point. (The text assumes the code units are
+ * unsigned.)
+ * - UTF-8 The number of code units is between one and four. The first 127
+ * Unicode code points match the ASCII character set. When the highest bit is
+ * set it means the code point has more than one code unit.
+ * - UTF-16: The number of code units is between 1 and 2. When the first
+ * code unit is in the range [0xd800,0xdfff) it means the code point uses two
+ * code units.
+ * - UTF-32: The number of code units is always one.
+ *
+ * The code point to the number of columns isn't well defined. The code uses the
+ * estimations defined in [format.string.std]/11. This list might change in the
+ * future.
+ *
+ * The algorithm of @ref __get_string_alignment uses two
diff erent scanners:
+ * - The simple scanner @ref __estimate_column_width_fast. This scanner assumes
+ * 1 code unit is 1 column. This scanner stops when it can't be sure the
+ * assumption is valid:
+ * - UTF-8 when the code point is encoded in more than 1 code unit.
+ * - UTF-16 and UTF-32 when the first multi-column code point is encountered.
+ * (The code unit's value is lower than 0xd800 so the 2 code unit encoding
+ * is irrelevant for this scanner.)
+ * Due to these assumptions the scanner is faster than the full scanner. It
+ * can process all text only containing ASCII. For UTF-16/32 it can process
+ * most (all?) European languages. (Note the set it can process might be
+ * reduced in the future, due to updates in the scanning rules.)
+ * - The full scanner @ref __estimate_column_width. This scanner, if needed,
+ * converts multiple code units into one code point then converts the code
+ * point to a column width.
+ *
+ * See also:
+ * - [format.string.general]/11
+ * - https://en.wikipedia.org/wiki/UTF-8#Encoding
+ * - https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
+ */
+
+/**
+ * The first 2 column code point.
+ *
+ * This is the point where the fast UTF-16/32 scanner needs to stop processing.
+ */
+inline constexpr uint32_t __two_column_code_point = 0x1100;
+
+/** Helper concept for an UTF-8 character type. */
+template <class _CharT>
+concept __utf8_character = same_as<_CharT, char> || same_as<_CharT, char8_t>;
+
+/** Helper concept for an UTF-16 character type. */
+template <class _CharT>
+concept __utf16_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) || same_as<_CharT, char16_t>;
+
+/** Helper concept for an UTF-32 character type. */
+template <class _CharT>
+concept __utf32_character = (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) || same_as<_CharT, char32_t>;
+
+/** Helper concept for an UTF-16 or UTF-32 character type. */
+template <class _CharT>
+concept __utf16_or_32_character = __utf16_character<_CharT> || __utf32_character<_CharT>;
+
+/**
+ * Converts a code point to the column width.
+ *
+ * The estimations are conforming to [format.string.general]/11
+ *
+ * This version expects a value less than 0x1'0000, which is a 3-byte UTF-8
+ * character.
+ */
+_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_3(uint32_t __c) noexcept {
+ _LIBCPP_ASSERT(__c < 0x1'0000,
+ "Use __column_width_4 or __column_width for larger values");
+
+ // clang-format off
+ return 1 + (__c >= 0x1100 && (__c <= 0x115f ||
+ (__c >= 0x2329 && (__c <= 0x232a ||
+ (__c >= 0x2e80 && (__c <= 0x303e ||
+ (__c >= 0x3040 && (__c <= 0xa4cf ||
+ (__c >= 0xac00 && (__c <= 0xd7a3 ||
+ (__c >= 0xf900 && (__c <= 0xfaff ||
+ (__c >= 0xfe10 && (__c <= 0xfe19 ||
+ (__c >= 0xfe30 && (__c <= 0xfe6f ||
+ (__c >= 0xff00 && (__c <= 0xff60 ||
+ (__c >= 0xffe0 && (__c <= 0xffe6
+ ))))))))))))))))))));
+ // clang-format on
+}
+
+/**
+ * @overload
+ *
+ * This version expects a value greater than or equal to 0x1'0000, which is a
+ * 4-byte UTF-8 character.
+ */
+_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width_4(uint32_t __c) noexcept {
+ _LIBCPP_ASSERT(__c >= 0x1'0000,
+ "Use __column_width_3 or __column_width for smaller values");
+
+ // clang-format off
+ return 1 + (__c >= 0x1'f300 && (__c <= 0x1'f64f ||
+ (__c >= 0x1'f900 && (__c <= 0x1'f9ff ||
+ (__c >= 0x2'0000 && (__c <= 0x2'fffd ||
+ (__c >= 0x3'0000 && (__c <= 0x3'fffd
+ ))))))));
+ // clang-format on
+}
+
+/**
+ * @overload
+ *
+ * The general case, accepting all values.
+ */
+_LIBCPP_HIDE_FROM_ABI inline constexpr int __column_width(uint32_t __c) noexcept {
+ if (__c < 0x1'0000)
+ return __column_width_3(__c);
+
+ return __column_width_4(__c);
+}
+
+/**
+ * Estimate the column width for the UTF-8 sequence using the fast algorithm.
+ */
+template <__utf8_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__estimate_column_width_fast(const _CharT* __first,
+ const _CharT* __last) noexcept {
+ return _VSTD::find_if(__first, __last,
+ [](unsigned char __c) { return __c & 0x80; });
+}
+
+/**
+ * @overload
+ *
+ * The implementation for UTF-16/32.
+ */
+template <__utf16_or_32_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr const _CharT*
+__estimate_column_width_fast(const _CharT* __first,
+ const _CharT* __last) noexcept {
+ return _VSTD::find_if(__first, __last,
+ [](uint32_t __c) { return __c >= 0x1100; });
+}
+
+template <class _CharT>
+struct _LIBCPP_TEMPLATE_VIS __column_width_result {
+ /** The number of output columns. */
+ size_t __width;
+ /**
+ * The last parsed element.
+ *
+ * This limits the original output to fit in the wanted number of columns.
+ */
+ const _CharT* __ptr;
+};
+
+/**
+ * Small helper to determine the width of malformed Unicode.
+ *
+ * @note This function's only needed for UTF-8. During scanning UTF-8 there
+ * are multiple place where it can be detected that the Unicode is malformed.
+ * UTF-16 only requires 1 test and UTF-32 requires no testing.
+ */
+template <__utf8_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width_malformed(const _CharT* __first, const _CharT* __last,
+ size_t __maximum, size_t __result) noexcept {
+ size_t __size = __last - __first;
+ size_t __n = _VSTD::min(__size, __maximum);
+ return {__result + __n, __first + __n};
+}
+
+/**
+ * Determines the number of output columns needed to render the input.
+ *
+ * @note When the scanner encounters malformed Unicode it acts as-if every code
+ * unit at the end of the input is one output column. It's expected the output
+ * terminal will replace these malformed code units with a one column
+ * replacement characters.
+ *
+ * @param __first Points to the first element of the input range.
+ * @param __last Points beyond the last element of the input range.
+ * @param __maximum The maximum number of output columns. The returned number
+ * of estimated output columns will not exceed this value.
+ */
+template <__utf8_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width(const _CharT* __first, const _CharT* __last,
+ size_t __maximum) noexcept {
+ size_t __result = 0;
+
+ while (__first != __last) {
+ // Based on the number of leading 1 bits the number of code units in the
+ // code point can be determined. See
+ // https://en.wikipedia.org/wiki/UTF-8#Encoding
+ switch (_VSTD::countl_one(static_cast<unsigned char>(*__first))) {
+ case 0: // 1-code unit encoding: all 1 column
+ ++__result;
+ ++__first;
+ break;
+
+ case 2: // 2-code unit encoding: all 1 column
+ // Malformed Unicode.
+ if (__last - __first < 2) [[unlikely]]
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ __first += 2;
+ ++__result;
+ break;
+
+ case 3: // 3-code unit encoding: either 1 or 2 columns
+ // Malformed Unicode.
+ if (__last - __first < 3) [[unlikely]]
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ {
+ uint32_t __c = static_cast<unsigned char>(*__first++) & 0x0f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __result += __column_width_3(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first - 3};
+ }
+ break;
+ case 4: // 4-code unit encoding: either 1 or 2 columns
+ // Malformed Unicode.
+ if (__last - __first < 4) [[unlikely]]
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ {
+ uint32_t __c = static_cast<unsigned char>(*__first++) & 0x07;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __c <<= 6;
+ __c |= static_cast<unsigned char>(*__first++) & 0x3f;
+ __result += __column_width_4(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first - 4};
+ }
+ break;
+ default:
+ // Malformed Unicode.
+ return __estimate_column_width_malformed(__first, __last, __maximum,
+ __result);
+ }
+
+ if (__result >= __maximum)
+ return {__result, __first};
+ }
+ return {__result, __first};
+}
+
+template <__utf16_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width(const _CharT* __first, const _CharT* __last,
+ size_t __maximum) noexcept {
+ size_t __result = 0;
+
+ while (__first != __last) {
+ uint32_t __c = *__first;
+ // Is the code unit part of a surrogate pair? See
+ // https://en.wikipedia.org/wiki/UTF-16#U+D800_to_U+DFFF
+ if (__c >= 0xd800 && __c <= 0xDfff) {
+ // Malformed Unicode.
+ if (__last - __first < 2) [[unlikely]]
+ return {__result + 1, __first + 1};
+
+ __c -= 0xd800;
+ __c <<= 10;
+ __c += (*(__first + 1) - 0xdc00);
+ __c += 0x10'000;
+
+ __result += __column_width_4(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first};
+ __first += 2;
+ } else {
+ __result += __column_width_3(__c);
+ if (__result > __maximum)
+ return {__result - 2, __first};
+ ++__first;
+ }
+
+ if (__result >= __maximum)
+ return {__result, __first};
+ }
+
+ return {__result, __first};
+}
+
+template <__utf32_character _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_CharT>
+__estimate_column_width(const _CharT* __first, const _CharT* __last,
+ size_t __maximum) noexcept {
+ size_t __result = 0;
+
+ while (__first != __last) {
+ wchar_t __c = *__first;
+ __result += __column_width(__c);
+
+ if (__result > __maximum)
+ return {__result - 2, __first};
+
+ ++__first;
+ if (__result >= __maximum)
+ return {__result, __first};
+ }
+
+ return {__result, __first};
+}
+
+} // namespace __detail
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
+__get_string_alignment(const _CharT* __first, const _CharT* __last,
+ ptr
diff _t __width, ptr
diff _t __precision) noexcept {
+ _LIBCPP_ASSERT(__width != 0 || __precision != -1,
+ "The function has no effect and shouldn't be used");
+
+ // TODO FMT There might be more optimizations possible:
+ // If __precision == __format::__number_max and the encoding is:
+ // * UTF-8 : 4 * (__last - __first) >= __width
+ // * UTF-16 : 2 * (__last - __first) >= __width
+ // * UTF-32 : (__last - __first) >= __width
+ // In these cases it's certain the output is at least the requested width.
+ // It's unknown how often this happens in practice. For now the improvement
+ // isn't implemented.
+
+ /*
+ * First assume there are no special Unicode code units in the input.
+ * - Apply the precision (this may reduce the size of the input). When
+ * __precison == -1 this step is omitted.
+ * - Scan for special code units in the input.
+ * If our assumption was correct the __pos will be at the end of the input.
+ */
+ const ptr
diff _t __length = __last - __first;
+ const _CharT* __limit =
+ __first +
+ (__precision == -1 ? __length : _VSTD::min(__length, __precision));
+ ptr
diff _t __size = __limit - __first;
+ const _CharT* __pos =
+ __detail::__estimate_column_width_fast(__first, __limit);
+
+ if (__pos == __limit)
+ return {__limit, __size, __size < __width};
+
+ /*
+ * Our assumption was wrong, there are special Unicode code units.
+ * The range [__first, __pos) contains a set of code units with the
+ * following property:
+ * Every _CharT in the range will be rendered in 1 column.
+ *
+ * If there's no maximum width and the parsed size already exceeds the
+ * minimum required width. The real size isn't important. So bail out.
+ */
+ if (__precision == -1 && (__pos - __first) >= __width)
+ return {__last, 0, false};
+
+ /* If there's a __precision, truncate the output to that width. */
+ ptr
diff _t __prefix = __pos - __first;
+ if (__precision != -1) {
+ _LIBCPP_ASSERT(__precision > __prefix, "Logic error.");
+ auto __lengh_info = __detail::__estimate_column_width(
+ __pos, __last, __precision - __prefix);
+ __size = __lengh_info.__width + __prefix;
+ return {__lengh_info.__ptr, __size, __size < __width};
+ }
+
+ /* Else use __width to determine the number of required padding characters. */
+ _LIBCPP_ASSERT(__width > __prefix, "Logic error.");
+ /*
+ * The column width is always one or two columns. For the precision the wanted
+ * column width is the maximum, for the width it's the minimum. Using the
+ * width estimation with its truncating behavior will result in the wrong
+ * result in the following case:
+ * - The last code unit processed requires two columns and exceeds the
+ * maximum column width.
+ * By increasing the __maximum by one avoids this issue. (It means it may
+ * pass one code point more than required to determine the proper result;
+ * that however isn't a problem for the algorithm.)
+ */
+ size_t __maximum = 1 + __width - __prefix;
+ auto __lengh_info =
+ __detail::__estimate_column_width(__pos, __last, __maximum);
+ if (__lengh_info.__ptr != __last) {
+ // Consumed the width number of code units. The exact size of the string
+ // is unknown. We only know we don't need to align the output.
+ _LIBCPP_ASSERT(static_cast<ptr
diff _t>(__lengh_info.__width + __prefix) >=
+ __width,
+ "Logic error");
+ return {__last, 0, false};
+ }
+
+ __size = __lengh_info.__width + __prefix;
+ return {__last, __size, __size < __width};
+}
+#else // _LIBCPP_HAS_NO_UNICODE
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI constexpr __string_alignment<_CharT>
+__get_string_alignment(const _CharT* __first, const _CharT* __last,
+ ptr
diff _t __width, ptr
diff _t __precision) noexcept {
+ const ptr
diff _t __length = __last - __first;
+ const _CharT* __limit =
+ __first +
+ (__precision == -1 ? __length : _VSTD::min(__length, __precision));
+ ptr
diff _t __size = __limit - __first;
+ return {__limit, __size, __size < __width};
+}
+#endif // _LIBCPP_HAS_NO_UNICODE
+
} // namespace __format_spec
# endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
@@ -719,4 +1181,6 @@ class _LIBCPP_TEMPLATE_VIS __parser_integral
_LIBCPP_END_NAMESPACE_STD
+_LIBCPP_POP_MACROS
+
#endif // _LIBCPP___FORMAT_PARSER_STD_FORMAT_SPEC_H
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_non_unicode.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_non_unicode.pass.cpp
new file mode 100644
index 0000000000000..51c0826328d21
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_non_unicode.pass.cpp
@@ -0,0 +1,110 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// UTF-32 doesn't work properly
+// XFAIL: windows
+
+// <format>
+
+// Tests the Unicode width support of the standard format specifiers.
+// It tests [format.string.std]/8 - 11:
+// - Properly determining the estimated with of a unicode string.
+// - Properly truncating to the wanted maximum width.
+
+// This version runs the test when the platform doesn't have Unicode support.
+// REQUIRES: libcpp-has-no-unicode
+
+#include <format>
+#include <cassert>
+
+#include "test_macros.h"
+#include "make_string.h"
+
+#define CSTR(S) MAKE_CSTRING(CharT, S)
+
+using namespace std::__format_spec;
+
+template <class CharT>
+constexpr bool operator==(const __string_alignment<CharT>& lhs,
+ const __string_alignment<CharT>& rhs) noexcept {
+ return lhs.__last == rhs.__last && lhs.__size == rhs.__size &&
+ lhs.__align == rhs.__align;
+}
+
+template <class CharT>
+constexpr void get_string_alignment(size_t offset, ptr
diff _t size, bool align,
+ const CharT* str, size_t width,
+ size_t precision) {
+ std::basic_string_view<CharT> sv{str};
+ __string_alignment<CharT> expected{sv.begin() + offset, size, align};
+ __string_alignment<CharT> traits =
+ __get_string_alignment(sv.begin(), sv.end(), width, precision);
+ assert(traits == expected);
+}
+
+template <class CharT>
+constexpr void get_string_alignment() {
+ // Truncate the input.
+ get_string_alignment(2, 2, false, CSTR("abc"), 0, 2);
+
+ // The 2-column character gets half accepted.
+ get_string_alignment(2, 2, false, CSTR("a\u115f"), 0, 2);
+
+ // No alignment since the number of characters fits.
+ get_string_alignment(2, 2, false, CSTR("a\u115f"), 2, 2);
+
+ // Same but for a 2-column 4-byte UTF-8 sequence
+ get_string_alignment(2, 2, false, CSTR("a\U0001f300"), 0, 2);
+ get_string_alignment(2, 2, false, CSTR("a\U0001f300"), 2, 2);
+
+ // No alignment required.
+ get_string_alignment(3, 3, false, CSTR("abc"), 2, -1);
+ get_string_alignment(3, 3, false, CSTR("abc"), 3, -1);
+
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1),
+ 3 + 2 * (sizeof(CharT) == 1), false, CSTR("ab\u1111"), 2,
+ -1);
+
+ // Doesn't evaluate 'c' so size -> 0
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1),
+ 3 + 2 * (sizeof(CharT) == 1), false,
+ CSTR("a\u115fc") /* 2-column character */, 3, -1);
+ // Extend width
+ get_string_alignment(3, 3, true, CSTR("abc"), 4, -1);
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1),
+ 3 + 2 * (sizeof(CharT) == 1), true,
+ CSTR("a\u1160c") /* 1-column character */, 6, -1);
+}
+
+template <class CharT>
+constexpr void test() {
+ get_string_alignment<CharT>();
+}
+
+constexpr bool test() {
+ test<char>();
+ test<wchar_t>();
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
+ test<char8_t>();
+#endif
+#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
+ test<char16_t>();
+ test<char32_t>();
+#endif
+ return true;
+}
+
+int main(int, char**) {
+ test();
+ static_assert(test());
+
+ return 0;
+}
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp
new file mode 100644
index 0000000000000..8109331838a9e
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_string_unicode.pass.cpp
@@ -0,0 +1,270 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// UTF-32 doesn't work properly
+// XFAIL: windows
+
+// <format>
+
+// Tests the Unicode width support of the standard format specifiers.
+// It tests [format.string.std]/8 - 11:
+// - Properly determining the estimated with of a unicode string.
+// - Properly truncating to the wanted maximum width.
+
+// This version runs the test when the platform has Unicode support.
+// UNSUPPORTED: libcpp-has-no-unicode
+
+#include <format>
+#include <cassert>
+
+#include "test_macros.h"
+#include "make_string.h"
+
+#define CSTR(S) MAKE_CSTRING(CharT, S)
+
+using namespace std::__format_spec;
+
+template <class CharT>
+constexpr bool operator==(const __string_alignment<CharT>& lhs,
+ const __string_alignment<CharT>& rhs) noexcept {
+ return lhs.__last == rhs.__last && lhs.__size == rhs.__size &&
+ lhs.__align == rhs.__align;
+}
+
+template <class CharT>
+constexpr void get_string_alignment(size_t offset, ptr
diff _t size, bool align,
+ const CharT* str, size_t width,
+ size_t precision) {
+ std::basic_string_view<CharT> sv{str};
+ __string_alignment<CharT> expected{sv.begin() + offset, size, align};
+ __string_alignment<CharT> traits =
+ __get_string_alignment(sv.begin(), sv.end(), width, precision);
+ assert(traits == expected);
+}
+
+template <class CharT>
+constexpr void estimate_column_width_fast(size_t expected, const CharT* str) {
+ std::basic_string_view<CharT> sv{str};
+ const CharT* out =
+ __detail::__estimate_column_width_fast(sv.begin(), sv.end());
+ assert(out == sv.begin() + expected);
+}
+
+template <class CharT>
+constexpr void estimate_column_width_fast() {
+
+ // No unicode
+ estimate_column_width_fast(3, CSTR("abc"));
+ estimate_column_width_fast(3, CSTR("a\u007fc"));
+
+ if constexpr (sizeof(CharT) == 1) {
+ // UTF-8 stop at the first multi-byte character.
+ estimate_column_width_fast(0, CSTR("\u0080bc"));
+ estimate_column_width_fast(1, CSTR("a\u0080c"));
+ estimate_column_width_fast(2, CSTR("ab\u0080"));
+ estimate_column_width_fast(1, CSTR("aßc"));
+
+ estimate_column_width_fast(1, CSTR("a\u07ffc"));
+ estimate_column_width_fast(1, CSTR("a\u0800c"));
+
+ estimate_column_width_fast(1, CSTR("a\u10ffc"));
+ } else {
+ // UTF-16/32 stop at the first multi-column character.
+ estimate_column_width_fast(3, CSTR("\u0080bc"));
+ estimate_column_width_fast(3, CSTR("a\u0080c"));
+ estimate_column_width_fast(3, CSTR("ab\u0080"));
+ estimate_column_width_fast(3, CSTR("aßc"));
+
+ estimate_column_width_fast(3, CSTR("a\u07ffc"));
+ estimate_column_width_fast(3, CSTR("a\u0800c"));
+
+ estimate_column_width_fast(3, CSTR("a\u10ffc"));
+ }
+ // First 2-column character
+ estimate_column_width_fast(1, CSTR("a\u1100c"));
+
+ estimate_column_width_fast(1, CSTR("a\U0000ffffc"));
+ estimate_column_width_fast(1, CSTR("a\U00010000c"));
+ estimate_column_width_fast(1, CSTR("a\U0010FFFFc"));
+}
+
+template <class CharT>
+constexpr void estimate_column_width(size_t expected, const CharT* str) {
+ std::basic_string_view<CharT> sv{str};
+ std::__format_spec::__detail::__column_width_result<CharT> column_info =
+ __detail::__estimate_column_width(sv.begin(), sv.end(), -1);
+ assert(column_info.__width == expected);
+}
+
+template <class CharT>
+constexpr void estimate_column_width() {
+ //*** 1-byte code points ***
+ estimate_column_width(1, CSTR(" "));
+ estimate_column_width(1, CSTR("~"));
+
+ //*** 2-byte code points ***
+ estimate_column_width(1, CSTR("\u00a1")); // INVERTED EXCLAMATION MARK
+ estimate_column_width(1, CSTR("\u07ff")); // NKO TAMAN SIGN
+
+ //*** 3-byte code points ***
+ estimate_column_width(1, CSTR("\u0800")); // SAMARITAN LETTER ALAF
+ estimate_column_width(1, CSTR("\ufffd")); // REPLACEMENT CHARACTER
+
+ // 2 column ranges
+ estimate_column_width(2, CSTR("\u1100")); // HANGUL CHOSEONG KIYEOK
+ estimate_column_width(2, CSTR("\u115f")); // HANGUL CHOSEONG FILLER
+
+ estimate_column_width(2, CSTR("\u2329")); // LEFT-POINTING ANGLE BRACKET
+ estimate_column_width(2, CSTR("\u232a")); // RIGHT-POINTING ANGLE BRACKET
+
+ estimate_column_width(2, CSTR("\u2e80")); // CJK RADICAL REPEAT
+ estimate_column_width(2, CSTR("\u303e")); // IDEOGRAPHIC VARIATION INDICATOR
+
+ estimate_column_width(2, CSTR("\u3040")); // U+3041 HIRAGANA LETTER SMALL A
+ estimate_column_width(2, CSTR("\ua4cf")); // U+A4D0 LISU LETTER BA
+
+ estimate_column_width(2, CSTR("\uac00")); // <Hangul Syllable, First>
+ estimate_column_width(2, CSTR("\ud7a3")); // Hangul Syllable Hih
+
+ estimate_column_width(2, CSTR("\uf900")); // CJK COMPATIBILITY IDEOGRAPH-F900
+ estimate_column_width(2, CSTR("\ufaff")); // U+FB00 LATIN SMALL LIGATURE FF
+
+ estimate_column_width(2,
+ CSTR("\ufe10")); // PRESENTATION FORM FOR VERTICAL COMMA
+ estimate_column_width(
+ 2, CSTR("\ufe19")); // PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
+
+ estimate_column_width(
+ 2, CSTR("\ufe30")); // PRESENTATION FORM FOR VERTICAL TWO DOT LEADER
+ estimate_column_width(2,
+ CSTR("\ufe6f")); // U+FE70 ARABIC FATHATAN ISOLATED FORM
+
+ estimate_column_width(2, CSTR("\uff00")); // U+FF01 FULLWIDTH EXCLAMATION MARK
+ estimate_column_width(2, CSTR("\uff60")); // FULLWIDTH RIGHT WHITE PARENTHESIS
+
+ estimate_column_width(2, CSTR("\uffe0")); // FULLWIDTH CENT SIGN
+ estimate_column_width(2, CSTR("\uffe6")); // FULLWIDTH WON SIGN
+
+ //*** 4-byte code points ***
+ estimate_column_width(1, CSTR("\U00010000")); // LINEAR B SYLLABLE B008 A
+ estimate_column_width(1, CSTR("\U0010FFFF")); // Undefined Character
+
+ // 2 column ranges
+ estimate_column_width(2, CSTR("\U0001f300")); // CYCLONE
+ estimate_column_width(2, CSTR("\U0001f64f")); // PERSON WITH FOLDED HANDS
+ estimate_column_width(
+ 2, CSTR("\U0001f900")); // CIRCLED CROSS FORMEE WITH FOUR DOTS
+ estimate_column_width(2, CSTR("\U0001f9ff")); // NAZAR AMULET
+ estimate_column_width(
+ 2, CSTR("\U00020000")); // <CJK Ideograph Extension B, First>
+ estimate_column_width(2, CSTR("\U0002fffd")); // Undefined Character
+ estimate_column_width(
+ 2, CSTR("\U00030000")); // <CJK Ideograph Extension G, First>
+ estimate_column_width(2, CSTR("\U0003fffd")); // Undefined Character
+}
+
+template <class CharT>
+constexpr void get_string_alignment() {
+ // Truncate the input.
+ get_string_alignment(2, 2, false, CSTR("abc"), 0, 2);
+
+ // The 2-column character gets entirely rejected.
+ get_string_alignment(1, 1, false, CSTR("a\u115f"), 0, 2);
+
+ // Due to the requested width extra alignment is required.
+ get_string_alignment(1, 1, true, CSTR("a\u115f"), 2, 2);
+
+ // Same but for a 2-column 4-byte UTF-8 sequence
+ get_string_alignment(1, 1, false, CSTR("a\U0001f300"), 0, 2);
+ get_string_alignment(1, 1, true, CSTR("a\U0001f300"), 2, 2);
+
+ // No alignment required.
+ get_string_alignment(3, 3, false, CSTR("abc"), 2, -1);
+ get_string_alignment(3, 3, false, CSTR("abc"), 3, -1);
+
+ // Special case, we have a special character already parsed and have enough
+ // withd to satisfy the minumum required width.
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1), 0, false, CSTR("ab\u1111"),
+ 2, -1);
+
+ // Evaluates all so size ->4
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1), 4, false,
+ CSTR("a\u115fc") /* 2-column character */, 3, -1);
+ // Evaluates all so size ->4
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1), 4, false,
+ CSTR("a\u115fc") /* 2-column character */, 4, -1);
+
+ // Evaluates all so size ->5
+ get_string_alignment(4 + 2 * (sizeof(CharT) == 1), 5, false,
+ CSTR("a\u115fcd") /* 2-column character */, 4, -1);
+
+ // Evaluates all so size ->5
+ get_string_alignment(4 + 2 * (sizeof(CharT) == 1), 5, false,
+ CSTR("a\u115fcd") /* 2-column character */, 5, -1);
+
+ // Extend width
+ get_string_alignment(3, 3, true, CSTR("abc"), 4, -1);
+ get_string_alignment(3 + 2 * (sizeof(CharT) == 1), 3, true,
+ CSTR("a\u1160c") /* 1-column character */, 4, -1);
+
+ // In this case the threshold where the width is still determined.
+ get_string_alignment(2 + 2 * (sizeof(CharT) == 1), 3, false, CSTR("i\u1110"),
+ 2, -1);
+
+ // The width is no longer exactly determined.
+ get_string_alignment(2 + 2 * (sizeof(CharT) == 1), 0, false, CSTR("i\u1110"),
+ 1, -1);
+
+ // Extend width and truncate input.
+ get_string_alignment(1, 1, true, CSTR("abc"), 3, 1);
+
+ if constexpr (sizeof(CharT) == 1) {
+ // Corrupt UTF-8 sequence.
+ get_string_alignment(2, 2, false, CSTR("a\xc0"), 0, 3);
+ get_string_alignment(2, 2, false, CSTR("a\xe0"), 0, 3);
+ get_string_alignment(2, 2, false, CSTR("a\xf0"), 0, 3);
+ } else if constexpr (sizeof(CharT) == 2) {
+ // Corrupt UTF-16 sequence.
+ if constexpr (std::same_as<CharT, char16_t>)
+ get_string_alignment(2, 2, false, u"a\xdddd", 0, 3);
+ else
+ // Corrupt UTF-16 wchar_t seqence.
+ get_string_alignment(2, 2, false, L"a\xdddd", 0, 3);
+ }
+ // UTF-32 doesn't combine characters, thus no corruption tests.
+}
+
+template <class CharT>
+constexpr void test() {
+ estimate_column_width_fast<CharT>();
+ estimate_column_width<CharT>();
+ get_string_alignment<CharT>();
+}
+
+constexpr bool test() {
+ test<char>();
+ test<wchar_t>();
+#ifndef _LIBCPP_HAS_NO_CHAR8_T
+ test<char8_t>();
+#endif
+#ifndef _LIBCPP_HAS_NO_UNICODE_CHARS
+ test<char16_t>();
+ test<char32_t>();
+#endif
+ return true;
+}
+
+int main(int, char**) {
+ test();
+ static_assert(test());
+
+ return 0;
+}
More information about the libcxx-commits
mailing list