[libcxx-commits] [libcxx] 3ab20c6 - [libc++][format] Granularize formatter_output.
Mark de Wever via libcxx-commits
libcxx-commits at lists.llvm.org
Mon Jul 10 10:30:31 PDT 2023
Author: Mark de Wever
Date: 2023-07-10T19:30:26+02:00
New Revision: 3ab20c6809d4b5d35d3538422b60ea0abd527a47
URL: https://github.com/llvm/llvm-project/commit/3ab20c6809d4b5d35d3538422b60ea0abd527a47
DIFF: https://github.com/llvm/llvm-project/commit/3ab20c6809d4b5d35d3538422b60ea0abd527a47.diff
LOG: [libc++][format] Granularize formatter_output.
This should reduce the size of the transitive includes for the vector header.
Note the header still quite large so the difference may be small.
Depends on D154122
Reviewed By: ldionne, #libc
Differential Revision: https://reviews.llvm.org/D154286
Added:
libcxx/include/__format/write_escaped.h
Modified:
libcxx/include/CMakeLists.txt
libcxx/include/__chrono/formatter.h
libcxx/include/__format/formatter_char.h
libcxx/include/__format/formatter_floating_point.h
libcxx/include/__format/formatter_integral.h
libcxx/include/__format/formatter_output.h
libcxx/include/__format/formatter_string.h
libcxx/include/module.modulemap.in
libcxx/utils/data/ignore_format.txt
Removed:
################################################################################
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 02536914b9bdfd..0a14e89ee13baf 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -384,6 +384,7 @@ set(files
__format/range_formatter.h
__format/unicode.h
__format/width_estimation_table.h
+ __format/write_escaped.h
__functional/binary_function.h
__functional/binary_negate.h
__functional/bind.h
diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h
index 8274b0c05d4b34..2de6e1420d1272 100644
--- a/libcxx/include/__chrono/formatter.h
+++ b/libcxx/include/__chrono/formatter.h
@@ -38,8 +38,8 @@
#include <__format/format_functions.h>
#include <__format/format_parse_context.h>
#include <__format/formatter.h>
-#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
+#include <__format/write_escaped.h>
#include <__memory/addressof.h>
#include <cmath>
#include <ctime>
diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h
index 15a649807b73f3..8b1b357e160342 100644
--- a/libcxx/include/__format/formatter_char.h
+++ b/libcxx/include/__format/formatter_char.h
@@ -19,6 +19,7 @@
#include <__format/formatter_integral.h>
#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
+#include <__format/write_escaped.h>
#include <__type_traits/conditional.h>
#include <__type_traits/is_signed.h>
diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
index 4fd01f70aea133..e01d7b8d7bd0c8 100644
--- a/libcxx/include/__format/formatter_floating_point.h
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -28,6 +28,7 @@
#include <__format/formatter_integral.h>
#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
+#include <__iterator/concepts.h>
#include <__memory/allocator.h>
#include <__system_error/errc.h>
#include <__type_traits/conditional.h>
@@ -608,6 +609,37 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __format_floating_point_non_finite(
return __formatter::__write(__buffer, __last, _VSTD::move(__out_it), __specs);
}
+/// Writes additional zero's for the precision before the exponent.
+/// This is used when the precision requested in the format string is larger
+/// than the maximum precision of the floating-point type. These precision
+/// digits are always 0.
+///
+/// \param __exponent The location of the exponent character.
+/// \param __num_trailing_zeros The number of 0's to write before the exponent
+/// character.
+template <class _CharT, class _ParserCharT>
+_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros(
+ const _CharT* __first,
+ const _CharT* __last,
+ output_iterator<const _CharT&> auto __out_it,
+ __format_spec::__parsed_specifications<_ParserCharT> __specs,
+ size_t __size,
+ const _CharT* __exponent,
+ size_t __num_trailing_zeros) -> decltype(__out_it) {
+ _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range");
+ _LIBCPP_ASSERT_UNCATEGORIZED(__num_trailing_zeros > 0,
+ "The overload not writing trailing zeros should have been used");
+
+ __padding_size_result __padding =
+ __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_);
+ __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
+ __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it));
+ __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0'));
+ __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it));
+ return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
+}
+
+
template <floating_point _Tp, class _CharT, class _FormatContext>
_LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
__format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) {
diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index fe2e1ee73c298c..54246aa027189e 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -203,6 +203,72 @@ consteval size_t __buffer_size() noexcept
+ 1; // Reserve space for the sign.
}
+template <class _OutIt, class _CharT>
+_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first,
+ const char* __last, string&& __grouping, _CharT __sep,
+ __format_spec::__parsed_specifications<_CharT> __specs) {
+ int __size = (__first - __begin) + // [sign][prefix]
+ (__last - __first) + // data
+ (__grouping.size() - 1); // number of separator characters
+
+ __padding_size_result __padding = {0, 0};
+ if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) {
+ // Write [sign][prefix].
+ __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
+
+ if (__specs.__width_ > __size) {
+ // Write zero padding.
+ __padding.__before_ = __specs.__width_ - __size;
+ __out_it = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0'));
+ }
+ } else {
+ if (__specs.__width_ > __size) {
+ // Determine padding and write padding.
+ __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_);
+
+ __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
+ }
+ // Write [sign][prefix].
+ __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
+ }
+
+ auto __r = __grouping.rbegin();
+ auto __e = __grouping.rend() - 1;
+ _LIBCPP_ASSERT_UNCATEGORIZED(__r != __e, "The slow grouping formatting is used while "
+ "there will be no separators written.");
+ // The output is divided in small groups of numbers to write:
+ // - A group before the first separator.
+ // - A separator and a group, repeated for the number of separators.
+ // - A group after the last separator.
+ // This loop achieves that process by testing the termination condition
+ // midway in the loop.
+ //
+ // TODO FMT This loop evaluates the loop invariant `__parser.__type !=
+ // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
+ // happens in the __write call.) Benchmark whether making two loops and
+ // hoisting the invariant is worth the effort.
+ while (true) {
+ if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) {
+ __last = __first + *__r;
+ __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper);
+ __first = __last;
+ } else {
+ __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it));
+ __first += *__r;
+ }
+
+ if (__r == __e)
+ break;
+
+ ++__r;
+ *__out_it++ = __sep;
+ }
+
+ return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
+}
+
+
+
template <unsigned_integral _Tp, class _CharT, class _FormatContext>
_LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer(
_Tp __value,
diff --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h
index 6798c4b33f97d0..072305b6dbca10 100644
--- a/libcxx/include/__format/formatter_output.h
+++ b/libcxx/include/__format/formatter_output.h
@@ -12,29 +12,22 @@
#include <__algorithm/ranges_copy.h>
#include <__algorithm/ranges_fill_n.h>
-#include <__algorithm/ranges_for_each.h>
#include <__algorithm/ranges_transform.h>
#include <__bit/countl.h>
-#include <__charconv/to_chars_integral.h>
-#include <__charconv/to_chars_result.h>
-#include <__chrono/statically_widen.h>
#include <__concepts/same_as.h>
#include <__config>
#include <__format/buffer.h>
#include <__format/concepts.h>
-#include <__format/escaped_output_table.h>
#include <__format/formatter.h>
#include <__format/parser_std_format_spec.h>
#include <__format/unicode.h>
#include <__iterator/back_insert_iterator.h>
#include <__iterator/concepts.h>
#include <__iterator/iterator_traits.h> // iter_value_t
-#include <__system_error/errc.h>
-#include <__type_traits/make_unsigned.h>
+#include <__memory/addressof.h>
#include <__utility/move.h>
#include <__utility/unreachable.h>
#include <cstddef>
-#include <string>
#include <string_view>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -207,70 +200,6 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::
}
# endif // _LIBCPP_HAS_NO_UNICODE
-template <class _OutIt, class _CharT>
-_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first,
- const char* __last, string&& __grouping, _CharT __sep,
- __format_spec::__parsed_specifications<_CharT> __specs) {
- int __size = (__first - __begin) + // [sign][prefix]
- (__last - __first) + // data
- (__grouping.size() - 1); // number of separator characters
-
- __padding_size_result __padding = {0, 0};
- if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) {
- // Write [sign][prefix].
- __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
-
- if (__specs.__width_ > __size) {
- // Write zero padding.
- __padding.__before_ = __specs.__width_ - __size;
- __out_it = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0'));
- }
- } else {
- if (__specs.__width_ > __size) {
- // Determine padding and write padding.
- __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_);
-
- __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
- }
- // Write [sign][prefix].
- __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
- }
-
- auto __r = __grouping.rbegin();
- auto __e = __grouping.rend() - 1;
- _LIBCPP_ASSERT_UNCATEGORIZED(__r != __e, "The slow grouping formatting is used while "
- "there will be no separators written.");
- // The output is divided in small groups of numbers to write:
- // - A group before the first separator.
- // - A separator and a group, repeated for the number of separators.
- // - A group after the last separator.
- // This loop achieves that process by testing the termination condition
- // midway in the loop.
- //
- // TODO FMT This loop evaluates the loop invariant `__parser.__type !=
- // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
- // happens in the __write call.) Benchmark whether making two loops and
- // hoisting the invariant is worth the effort.
- while (true) {
- if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) {
- __last = __first + *__r;
- __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper);
- __first = __last;
- } else {
- __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it));
- __first += *__r;
- }
-
- if (__r == __e)
- break;
-
- ++__r;
- *__out_it++ = __sep;
- }
-
- return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
-}
-
/// Writes the input to the output with the required padding.
///
/// Since the output column width is specified the function can be used for
@@ -348,36 +277,6 @@ _LIBCPP_HIDE_FROM_ABI auto __write_transformed(const _CharT* __first, const _Cha
return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
}
-/// Writes additional zero's for the precision before the exponent.
-/// This is used when the precision requested in the format string is larger
-/// than the maximum precision of the floating-point type. These precision
-/// digits are always 0.
-///
-/// \param __exponent The location of the exponent character.
-/// \param __num_trailing_zeros The number of 0's to write before the exponent
-/// character.
-template <class _CharT, class _ParserCharT>
-_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros(
- const _CharT* __first,
- const _CharT* __last,
- output_iterator<const _CharT&> auto __out_it,
- __format_spec::__parsed_specifications<_ParserCharT> __specs,
- size_t __size,
- const _CharT* __exponent,
- size_t __num_trailing_zeros) -> decltype(__out_it) {
- _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range");
- _LIBCPP_ASSERT_UNCATEGORIZED(__num_trailing_zeros > 0,
- "The overload not writing trailing zeros should have been used");
-
- __padding_size_result __padding =
- __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_);
- __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
- __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it));
- __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0'));
- __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it));
- return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
-}
-
/// Writes a string using format's width estimation algorithm.
///
/// \pre !__specs.__has_precision()
@@ -412,183 +311,6 @@ _LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __pre
return __result.__width_;
}
-/// Writes a string using format's width estimation algorithm.
-///
-/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
-/// input is ASCII.
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI auto __write_string(
- basic_string_view<_CharT> __str,
- output_iterator<const _CharT&> auto __out_it,
- __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
- if (!__specs.__has_precision())
- return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs);
-
- int __size = __formatter::__truncate(__str, __specs.__precision_);
-
- return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size);
-}
-
-# if _LIBCPP_STD_VER >= 23
-
-struct __nul_terminator {};
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
- return *__cstr == _CharT('\0');
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void
-__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
- back_insert_iterator __out_it{__str};
- std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
-
- char __buffer[8];
- to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
- _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small");
- std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
-
- __str += _CharT('}');
-}
-
-// [format.string.escaped]/2.2.1.2
-// ...
-// then the sequence \u{hex-digit-sequence} is appended to E, where
-// hex-digit-sequence is the shortest hexadecimal representation of C using
-// lower-case hexadecimal digits.
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
- __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
-}
-
-// [format.string.escaped]/2.2.3
-// Otherwise (X is a sequence of ill-formed code units), each code unit U is
-// appended to E in order as the sequence \x{hex-digit-sequence}, where
-// hex-digit-sequence is the shortest hexadecimal representation of U using
-// lower-case hexadecimal digits.
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
- __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
-}
-
-template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
-# ifdef _LIBCPP_HAS_NO_UNICODE
- // For ASCII assume everything above 127 is printable.
- if (__value > 127)
- return false;
-# endif
-
- if (!__escaped_output_table::__needs_escape(__value))
- return false;
-
- __formatter::__write_well_formed_escaped_code_unit(__str, __value);
- return true;
-}
-
-template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
- return static_cast<make_unsigned_t<_CharT>>(__value);
-}
-
-enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote };
-
-// [format.string.escaped]/2
-template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
-__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
- // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
- switch (__value) {
- case _CharT('\t'):
- __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
- return true;
- case _CharT('\n'):
- __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
- return true;
- case _CharT('\r'):
- __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
- return true;
- case _CharT('\''):
- if (__mark == __escape_quotation_mark::__apostrophe)
- __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
- else
- __str += __value;
- return true;
- case _CharT('"'):
- if (__mark == __escape_quotation_mark::__double_quote)
- __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
- else
- __str += __value;
- return true;
- case _CharT('\\'):
- __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
- return true;
-
- // 2.2.1.2 - Space
- case _CharT(' '):
- __str += __value;
- return true;
- }
-
- // 2.2.2
- // Otherwise, if X is a shift sequence, the effect on E and further
- // decoding of S is unspecified.
- // For now shift sequences are ignored and treated as Unicode. Other parts
- // of the format library do the same. It's unknown how ostream treats them.
- // TODO FMT determine what to do with shift sequences.
-
- // 2.2.1.2.1 and 2.2.1.2.2 - Escape
- return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void
-__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
- __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
-
- while (!__view.__at_end()) {
- auto __first = __view.__position();
- typename __unicode::__consume_result __result = __view.__consume();
- if (__result.__status == __unicode::__consume_result::__ok) {
- if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
- // 2.2.1.3 - Add the character
- ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
- } else {
- // 2.2.3 sequence of ill-formed code units
- ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
- __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
- });
- }
- }
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI auto
-__format_escaped_char(_CharT __value,
- output_iterator<const _CharT&> auto __out_it,
- __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
- basic_string<_CharT> __str;
- __str += _CharT('\'');
- __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
- __str += _CharT('\'');
- return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size());
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI auto
-__format_escaped_string(basic_string_view<_CharT> __values,
- output_iterator<const _CharT&> auto __out_it,
- __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
- basic_string<_CharT> __str;
- __str += _CharT('"');
- __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
- __str += _CharT('"');
- return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs);
-}
-
-# endif // _LIBCPP_STD_VER >= 23
-
} // namespace __formatter
#endif //_LIBCPP_STD_VER >= 20
diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
index 0894e46fbd1872..ba64a64af80a7c 100644
--- a/libcxx/include/__format/formatter_string.h
+++ b/libcxx/include/__format/formatter_string.h
@@ -17,6 +17,7 @@
#include <__format/formatter.h>
#include <__format/formatter_output.h>
#include <__format/parser_std_format_spec.h>
+#include <__format/write_escaped.h>
#include <string>
#include <string_view>
diff --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h
new file mode 100644
index 00000000000000..8c51d0b1f1484b
--- /dev/null
+++ b/libcxx/include/__format/write_escaped.h
@@ -0,0 +1,222 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
+#define _LIBCPP___FORMAT_WRITE_ESCAPED_H
+
+#include <__algorithm/ranges_copy.h>
+#include <__algorithm/ranges_for_each.h>
+#include <__charconv/to_chars_integral.h>
+#include <__charconv/to_chars_result.h>
+#include <__chrono/statically_widen.h>
+#include <__format/escaped_output_table.h>
+#include <__format/formatter_output.h>
+#include <__format/parser_std_format_spec.h>
+#include <__format/unicode.h>
+#include <__iterator/back_insert_iterator.h>
+#include <__memory/addressof.h>
+#include <__system_error/errc.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/move.h>
+#include <string_view>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+# pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+
+namespace __formatter {
+
+#if _LIBCPP_STD_VER >= 20
+
+/// Writes a string using format's width estimation algorithm.
+///
+/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
+/// input is ASCII.
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI auto __write_string(
+ basic_string_view<_CharT> __str,
+ output_iterator<const _CharT&> auto __out_it,
+ __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
+ if (!__specs.__has_precision())
+ return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs);
+
+ int __size = __formatter::__truncate(__str, __specs.__precision_);
+
+ return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size);
+}
+
+# endif // _LIBCPP_STD_VER >= 20
+# if _LIBCPP_STD_VER >= 23
+
+struct __nul_terminator {};
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
+ return *__cstr == _CharT('\0');
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void
+__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
+ back_insert_iterator __out_it{__str};
+ std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
+
+ char __buffer[8];
+ to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
+ _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small");
+ std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
+
+ __str += _CharT('}');
+}
+
+// [format.string.escaped]/2.2.1.2
+// ...
+// then the sequence \u{hex-digit-sequence} is appended to E, where
+// hex-digit-sequence is the shortest hexadecimal representation of C using
+// lower-case hexadecimal digits.
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
+ __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
+}
+
+// [format.string.escaped]/2.2.3
+// Otherwise (X is a sequence of ill-formed code units), each code unit U is
+// appended to E in order as the sequence \x{hex-digit-sequence}, where
+// hex-digit-sequence is the shortest hexadecimal representation of U using
+// lower-case hexadecimal digits.
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
+ __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
+}
+
+template <class _CharT>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
+# ifdef _LIBCPP_HAS_NO_UNICODE
+ // For ASCII assume everything above 127 is printable.
+ if (__value > 127)
+ return false;
+# endif
+
+ if (!__escaped_output_table::__needs_escape(__value))
+ return false;
+
+ __formatter::__write_well_formed_escaped_code_unit(__str, __value);
+ return true;
+}
+
+template <class _CharT>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
+ return static_cast<make_unsigned_t<_CharT>>(__value);
+}
+
+enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote };
+
+// [format.string.escaped]/2
+template <class _CharT>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
+__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
+ // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
+ switch (__value) {
+ case _CharT('\t'):
+ __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
+ return true;
+ case _CharT('\n'):
+ __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
+ return true;
+ case _CharT('\r'):
+ __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
+ return true;
+ case _CharT('\''):
+ if (__mark == __escape_quotation_mark::__apostrophe)
+ __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
+ else
+ __str += __value;
+ return true;
+ case _CharT('"'):
+ if (__mark == __escape_quotation_mark::__double_quote)
+ __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
+ else
+ __str += __value;
+ return true;
+ case _CharT('\\'):
+ __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
+ return true;
+
+ // 2.2.1.2 - Space
+ case _CharT(' '):
+ __str += __value;
+ return true;
+ }
+
+ // 2.2.2
+ // Otherwise, if X is a shift sequence, the effect on E and further
+ // decoding of S is unspecified.
+ // For now shift sequences are ignored and treated as Unicode. Other parts
+ // of the format library do the same. It's unknown how ostream treats them.
+ // TODO FMT determine what to do with shift sequences.
+
+ // 2.2.1.2.1 and 2.2.1.2.2 - Escape
+ return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void
+__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
+ __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
+
+ while (!__view.__at_end()) {
+ auto __first = __view.__position();
+ typename __unicode::__consume_result __result = __view.__consume();
+ if (__result.__status == __unicode::__consume_result::__ok) {
+ if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
+ // 2.2.1.3 - Add the character
+ ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
+ } else {
+ // 2.2.3 sequence of ill-formed code units
+ ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
+ __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
+ });
+ }
+ }
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI auto
+__format_escaped_char(_CharT __value,
+ output_iterator<const _CharT&> auto __out_it,
+ __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
+ basic_string<_CharT> __str;
+ __str += _CharT('\'');
+ __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
+ __str += _CharT('\'');
+ return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size());
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI auto
+__format_escaped_string(basic_string_view<_CharT> __values,
+ output_iterator<const _CharT&> auto __out_it,
+ __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
+ basic_string<_CharT> __str;
+ __str += _CharT('"');
+ __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
+ __str += _CharT('"');
+ return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs);
+}
+
+# endif // _LIBCPP_STD_VER >= 23
+
+} // namespace __formatter
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H
diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index a9cebf891deb04..49d3f122d8c5d1 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -1015,6 +1015,7 @@ module std [system] {
module range_formatter { private header "__format/range_formatter.h" }
module unicode { private header "__format/unicode.h" }
module width_estimation_table { private header "__format/width_estimation_table.h" }
+ module write_escaped { private header "__format/write_escaped.h" }
}
}
module forward_list {
diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt
index 00b1f7408d0aad..f19302bb08090c 100644
--- a/libcxx/utils/data/ignore_format.txt
+++ b/libcxx/utils/data/ignore_format.txt
@@ -215,6 +215,7 @@ libcxx/include/__format/formatter_pointer.h
libcxx/include/__format/formatter_string.h
libcxx/include/__format/parser_std_format_spec.h
libcxx/include/__format/unicode.h
+libcxx/include/__format/write_escaped.h
libcxx/include/forward_list
libcxx/include/fstream
libcxx/include/__functional/binary_function.h
More information about the libcxx-commits
mailing list