[libcxx-commits] [libcxx] 3ab20c6 - [libc++][format] Granularize formatter_output.

Mark de Wever via libcxx-commits libcxx-commits at lists.llvm.org
Mon Jul 10 10:30:31 PDT 2023


Author: Mark de Wever
Date: 2023-07-10T19:30:26+02:00
New Revision: 3ab20c6809d4b5d35d3538422b60ea0abd527a47

URL: https://github.com/llvm/llvm-project/commit/3ab20c6809d4b5d35d3538422b60ea0abd527a47
DIFF: https://github.com/llvm/llvm-project/commit/3ab20c6809d4b5d35d3538422b60ea0abd527a47.diff

LOG: [libc++][format] Granularize formatter_output.

This should reduce the size of the transitive includes for the vector header.
Note the header still quite large so the difference may be small.

Depends on D154122

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D154286

Added: 
    libcxx/include/__format/write_escaped.h

Modified: 
    libcxx/include/CMakeLists.txt
    libcxx/include/__chrono/formatter.h
    libcxx/include/__format/formatter_char.h
    libcxx/include/__format/formatter_floating_point.h
    libcxx/include/__format/formatter_integral.h
    libcxx/include/__format/formatter_output.h
    libcxx/include/__format/formatter_string.h
    libcxx/include/module.modulemap.in
    libcxx/utils/data/ignore_format.txt

Removed: 
    


################################################################################
diff  --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 02536914b9bdfd..0a14e89ee13baf 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -384,6 +384,7 @@ set(files
   __format/range_formatter.h
   __format/unicode.h
   __format/width_estimation_table.h
+  __format/write_escaped.h
   __functional/binary_function.h
   __functional/binary_negate.h
   __functional/bind.h

diff  --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h
index 8274b0c05d4b34..2de6e1420d1272 100644
--- a/libcxx/include/__chrono/formatter.h
+++ b/libcxx/include/__chrono/formatter.h
@@ -38,8 +38,8 @@
 #include <__format/format_functions.h>
 #include <__format/format_parse_context.h>
 #include <__format/formatter.h>
-#include <__format/formatter_output.h>
 #include <__format/parser_std_format_spec.h>
+#include <__format/write_escaped.h>
 #include <__memory/addressof.h>
 #include <cmath>
 #include <ctime>

diff  --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h
index 15a649807b73f3..8b1b357e160342 100644
--- a/libcxx/include/__format/formatter_char.h
+++ b/libcxx/include/__format/formatter_char.h
@@ -19,6 +19,7 @@
 #include <__format/formatter_integral.h>
 #include <__format/formatter_output.h>
 #include <__format/parser_std_format_spec.h>
+#include <__format/write_escaped.h>
 #include <__type_traits/conditional.h>
 #include <__type_traits/is_signed.h>
 

diff  --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
index 4fd01f70aea133..e01d7b8d7bd0c8 100644
--- a/libcxx/include/__format/formatter_floating_point.h
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -28,6 +28,7 @@
 #include <__format/formatter_integral.h>
 #include <__format/formatter_output.h>
 #include <__format/parser_std_format_spec.h>
+#include <__iterator/concepts.h>
 #include <__memory/allocator.h>
 #include <__system_error/errc.h>
 #include <__type_traits/conditional.h>
@@ -608,6 +609,37 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __format_floating_point_non_finite(
   return __formatter::__write(__buffer, __last, _VSTD::move(__out_it), __specs);
 }
 
+/// Writes additional zero's for the precision before the exponent.
+/// This is used when the precision requested in the format string is larger
+/// than the maximum precision of the floating-point type. These precision
+/// digits are always 0.
+///
+/// \param __exponent           The location of the exponent character.
+/// \param __num_trailing_zeros The number of 0's to write before the exponent
+///                             character.
+template <class _CharT, class _ParserCharT>
+_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros(
+    const _CharT* __first,
+    const _CharT* __last,
+    output_iterator<const _CharT&> auto __out_it,
+    __format_spec::__parsed_specifications<_ParserCharT> __specs,
+    size_t __size,
+    const _CharT* __exponent,
+    size_t __num_trailing_zeros) -> decltype(__out_it) {
+  _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range");
+  _LIBCPP_ASSERT_UNCATEGORIZED(__num_trailing_zeros > 0,
+                               "The overload not writing trailing zeros should have been used");
+
+  __padding_size_result __padding =
+      __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_);
+  __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
+  __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it));
+  __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0'));
+  __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it));
+  return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
+}
+
+
 template <floating_point _Tp, class _CharT, class _FormatContext>
 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator
 __format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) {

diff  --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index fe2e1ee73c298c..54246aa027189e 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -203,6 +203,72 @@ consteval size_t __buffer_size() noexcept
        + 1;                          // Reserve space for the sign.
 }
 
+template <class _OutIt, class _CharT>
+_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first,
+                                                              const char* __last, string&& __grouping, _CharT __sep,
+                                                              __format_spec::__parsed_specifications<_CharT> __specs) {
+  int __size = (__first - __begin) +    // [sign][prefix]
+               (__last - __first) +     // data
+               (__grouping.size() - 1); // number of separator characters
+
+  __padding_size_result __padding = {0, 0};
+  if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) {
+    // Write [sign][prefix].
+    __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
+
+    if (__specs.__width_ > __size) {
+      // Write zero padding.
+      __padding.__before_ = __specs.__width_ - __size;
+      __out_it            = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0'));
+    }
+  } else {
+    if (__specs.__width_ > __size) {
+      // Determine padding and write padding.
+      __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_);
+
+      __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
+    }
+    // Write [sign][prefix].
+    __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
+  }
+
+  auto __r = __grouping.rbegin();
+  auto __e = __grouping.rend() - 1;
+  _LIBCPP_ASSERT_UNCATEGORIZED(__r != __e, "The slow grouping formatting is used while "
+                                           "there will be no separators written.");
+  // The output is divided in small groups of numbers to write:
+  // - A group before the first separator.
+  // - A separator and a group, repeated for the number of separators.
+  // - A group after the last separator.
+  // This loop achieves that process by testing the termination condition
+  // midway in the loop.
+  //
+  // TODO FMT This loop evaluates the loop invariant `__parser.__type !=
+  // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
+  // happens in the __write call.) Benchmark whether making two loops and
+  // hoisting the invariant is worth the effort.
+  while (true) {
+    if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) {
+      __last = __first + *__r;
+      __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper);
+      __first = __last;
+    } else {
+      __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it));
+      __first += *__r;
+    }
+
+    if (__r == __e)
+      break;
+
+    ++__r;
+    *__out_it++ = __sep;
+  }
+
+  return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
+}
+
+
+
 template <unsigned_integral _Tp, class _CharT, class _FormatContext>
 _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer(
     _Tp __value,

diff  --git a/libcxx/include/__format/formatter_output.h b/libcxx/include/__format/formatter_output.h
index 6798c4b33f97d0..072305b6dbca10 100644
--- a/libcxx/include/__format/formatter_output.h
+++ b/libcxx/include/__format/formatter_output.h
@@ -12,29 +12,22 @@
 
 #include <__algorithm/ranges_copy.h>
 #include <__algorithm/ranges_fill_n.h>
-#include <__algorithm/ranges_for_each.h>
 #include <__algorithm/ranges_transform.h>
 #include <__bit/countl.h>
-#include <__charconv/to_chars_integral.h>
-#include <__charconv/to_chars_result.h>
-#include <__chrono/statically_widen.h>
 #include <__concepts/same_as.h>
 #include <__config>
 #include <__format/buffer.h>
 #include <__format/concepts.h>
-#include <__format/escaped_output_table.h>
 #include <__format/formatter.h>
 #include <__format/parser_std_format_spec.h>
 #include <__format/unicode.h>
 #include <__iterator/back_insert_iterator.h>
 #include <__iterator/concepts.h>
 #include <__iterator/iterator_traits.h> // iter_value_t
-#include <__system_error/errc.h>
-#include <__type_traits/make_unsigned.h>
+#include <__memory/addressof.h>
 #include <__utility/move.h>
 #include <__utility/unreachable.h>
 #include <cstddef>
-#include <string>
 #include <string_view>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -207,70 +200,6 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::
 }
 #  endif   // _LIBCPP_HAS_NO_UNICODE
 
-template <class _OutIt, class _CharT>
-_LIBCPP_HIDE_FROM_ABI _OutIt __write_using_decimal_separators(_OutIt __out_it, const char* __begin, const char* __first,
-                                                              const char* __last, string&& __grouping, _CharT __sep,
-                                                              __format_spec::__parsed_specifications<_CharT> __specs) {
-  int __size = (__first - __begin) +    // [sign][prefix]
-               (__last - __first) +     // data
-               (__grouping.size() - 1); // number of separator characters
-
-  __padding_size_result __padding = {0, 0};
-  if (__specs.__alignment_ == __format_spec::__alignment::__zero_padding) {
-    // Write [sign][prefix].
-    __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
-
-    if (__specs.__width_ > __size) {
-      // Write zero padding.
-      __padding.__before_ = __specs.__width_ - __size;
-      __out_it            = __formatter::__fill(_VSTD::move(__out_it), __specs.__width_ - __size, _CharT('0'));
-    }
-  } else {
-    if (__specs.__width_ > __size) {
-      // Determine padding and write padding.
-      __padding = __formatter::__padding_size(__size, __specs.__width_, __specs.__alignment_);
-
-      __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
-    }
-    // Write [sign][prefix].
-    __out_it = __formatter::__copy(__begin, __first, _VSTD::move(__out_it));
-  }
-
-  auto __r = __grouping.rbegin();
-  auto __e = __grouping.rend() - 1;
-  _LIBCPP_ASSERT_UNCATEGORIZED(__r != __e, "The slow grouping formatting is used while "
-                                           "there will be no separators written.");
-  // The output is divided in small groups of numbers to write:
-  // - A group before the first separator.
-  // - A separator and a group, repeated for the number of separators.
-  // - A group after the last separator.
-  // This loop achieves that process by testing the termination condition
-  // midway in the loop.
-  //
-  // TODO FMT This loop evaluates the loop invariant `__parser.__type !=
-  // _Flags::_Type::__hexadecimal_upper_case` for every iteration. (This test
-  // happens in the __write call.) Benchmark whether making two loops and
-  // hoisting the invariant is worth the effort.
-  while (true) {
-    if (__specs.__std_.__type_ == __format_spec::__type::__hexadecimal_upper_case) {
-      __last = __first + *__r;
-      __out_it = __formatter::__transform(__first, __last, _VSTD::move(__out_it), __hex_to_upper);
-      __first = __last;
-    } else {
-      __out_it = __formatter::__copy(__first, *__r, _VSTD::move(__out_it));
-      __first += *__r;
-    }
-
-    if (__r == __e)
-      break;
-
-    ++__r;
-    *__out_it++ = __sep;
-  }
-
-  return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
-}
-
 /// Writes the input to the output with the required padding.
 ///
 /// Since the output column width is specified the function can be used for
@@ -348,36 +277,6 @@ _LIBCPP_HIDE_FROM_ABI auto __write_transformed(const _CharT* __first, const _Cha
   return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
 }
 
-/// Writes additional zero's for the precision before the exponent.
-/// This is used when the precision requested in the format string is larger
-/// than the maximum precision of the floating-point type. These precision
-/// digits are always 0.
-///
-/// \param __exponent           The location of the exponent character.
-/// \param __num_trailing_zeros The number of 0's to write before the exponent
-///                             character.
-template <class _CharT, class _ParserCharT>
-_LIBCPP_HIDE_FROM_ABI auto __write_using_trailing_zeros(
-    const _CharT* __first,
-    const _CharT* __last,
-    output_iterator<const _CharT&> auto __out_it,
-    __format_spec::__parsed_specifications<_ParserCharT> __specs,
-    size_t __size,
-    const _CharT* __exponent,
-    size_t __num_trailing_zeros) -> decltype(__out_it) {
-  _LIBCPP_ASSERT_UNCATEGORIZED(__first <= __last, "Not a valid range");
-  _LIBCPP_ASSERT_UNCATEGORIZED(__num_trailing_zeros > 0,
-                               "The overload not writing trailing zeros should have been used");
-
-  __padding_size_result __padding =
-      __formatter::__padding_size(__size + __num_trailing_zeros, __specs.__width_, __specs.__alignment_);
-  __out_it = __formatter::__fill(_VSTD::move(__out_it), __padding.__before_, __specs.__fill_);
-  __out_it = __formatter::__copy(__first, __exponent, _VSTD::move(__out_it));
-  __out_it = __formatter::__fill(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0'));
-  __out_it = __formatter::__copy(__exponent, __last, _VSTD::move(__out_it));
-  return __formatter::__fill(_VSTD::move(__out_it), __padding.__after_, __specs.__fill_);
-}
-
 /// Writes a string using format's width estimation algorithm.
 ///
 /// \pre !__specs.__has_precision()
@@ -412,183 +311,6 @@ _LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __pre
   return __result.__width_;
 }
 
-/// Writes a string using format's width estimation algorithm.
-///
-/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
-/// input is ASCII.
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI auto __write_string(
-    basic_string_view<_CharT> __str,
-    output_iterator<const _CharT&> auto __out_it,
-    __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
-  if (!__specs.__has_precision())
-    return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs);
-
-  int __size = __formatter::__truncate(__str, __specs.__precision_);
-
-  return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size);
-}
-
-#  if _LIBCPP_STD_VER >= 23
-
-struct __nul_terminator {};
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
-  return *__cstr == _CharT('\0');
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void
-__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
-  back_insert_iterator __out_it{__str};
-  std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
-
-  char __buffer[8];
-  to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
-  _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small");
-  std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
-
-  __str += _CharT('}');
-}
-
-// [format.string.escaped]/2.2.1.2
-// ...
-// then the sequence \u{hex-digit-sequence} is appended to E, where
-// hex-digit-sequence is the shortest hexadecimal representation of C using
-// lower-case hexadecimal digits.
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
-  __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
-}
-
-// [format.string.escaped]/2.2.3
-// Otherwise (X is a sequence of ill-formed code units), each code unit U is
-// appended to E in order as the sequence \x{hex-digit-sequence}, where
-// hex-digit-sequence is the shortest hexadecimal representation of U using
-// lower-case hexadecimal digits.
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
-  __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
-}
-
-template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
-#    ifdef _LIBCPP_HAS_NO_UNICODE
-  // For ASCII assume everything above 127 is printable.
-  if (__value > 127)
-    return false;
-#    endif
-
-  if (!__escaped_output_table::__needs_escape(__value))
-    return false;
-
-  __formatter::__write_well_formed_escaped_code_unit(__str, __value);
-  return true;
-}
-
-template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
-  return static_cast<make_unsigned_t<_CharT>>(__value);
-}
-
-enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote };
-
-// [format.string.escaped]/2
-template <class _CharT>
-[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
-__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
-  // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
-  switch (__value) {
-  case _CharT('\t'):
-    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
-    return true;
-  case _CharT('\n'):
-    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
-    return true;
-  case _CharT('\r'):
-    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
-    return true;
-  case _CharT('\''):
-    if (__mark == __escape_quotation_mark::__apostrophe)
-      __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
-    else
-      __str += __value;
-    return true;
-  case _CharT('"'):
-    if (__mark == __escape_quotation_mark::__double_quote)
-      __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
-    else
-      __str += __value;
-    return true;
-  case _CharT('\\'):
-    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
-    return true;
-
-  // 2.2.1.2 - Space
-  case _CharT(' '):
-    __str += __value;
-    return true;
-  }
-
-  // 2.2.2
-  //   Otherwise, if X is a shift sequence, the effect on E and further
-  //   decoding of S is unspecified.
-  // For now shift sequences are ignored and treated as Unicode. Other parts
-  // of the format library do the same. It's unknown how ostream treats them.
-  // TODO FMT determine what to do with shift sequences.
-
-  // 2.2.1.2.1 and 2.2.1.2.2 - Escape
-  return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI void
-__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
-  __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
-
-  while (!__view.__at_end()) {
-    auto __first                                  = __view.__position();
-    typename __unicode::__consume_result __result = __view.__consume();
-    if (__result.__status == __unicode::__consume_result::__ok) {
-      if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
-        // 2.2.1.3 - Add the character
-        ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
-    } else {
-      // 2.2.3 sequence of ill-formed code units
-      ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
-        __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
-      });
-    }
-  }
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI auto
-__format_escaped_char(_CharT __value,
-                      output_iterator<const _CharT&> auto __out_it,
-                      __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
-  basic_string<_CharT> __str;
-  __str += _CharT('\'');
-  __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
-  __str += _CharT('\'');
-  return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size());
-}
-
-template <class _CharT>
-_LIBCPP_HIDE_FROM_ABI auto
-__format_escaped_string(basic_string_view<_CharT> __values,
-                        output_iterator<const _CharT&> auto __out_it,
-                        __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
-  basic_string<_CharT> __str;
-  __str += _CharT('"');
-  __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
-  __str += _CharT('"');
-  return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs);
-}
-
-#  endif // _LIBCPP_STD_VER >= 23
-
 } // namespace __formatter
 
 #endif //_LIBCPP_STD_VER >= 20

diff  --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
index 0894e46fbd1872..ba64a64af80a7c 100644
--- a/libcxx/include/__format/formatter_string.h
+++ b/libcxx/include/__format/formatter_string.h
@@ -17,6 +17,7 @@
 #include <__format/formatter.h>
 #include <__format/formatter_output.h>
 #include <__format/parser_std_format_spec.h>
+#include <__format/write_escaped.h>
 #include <string>
 #include <string_view>
 

diff  --git a/libcxx/include/__format/write_escaped.h b/libcxx/include/__format/write_escaped.h
new file mode 100644
index 00000000000000..8c51d0b1f1484b
--- /dev/null
+++ b/libcxx/include/__format/write_escaped.h
@@ -0,0 +1,222 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
+#define _LIBCPP___FORMAT_WRITE_ESCAPED_H
+
+#include <__algorithm/ranges_copy.h>
+#include <__algorithm/ranges_for_each.h>
+#include <__charconv/to_chars_integral.h>
+#include <__charconv/to_chars_result.h>
+#include <__chrono/statically_widen.h>
+#include <__format/escaped_output_table.h>
+#include <__format/formatter_output.h>
+#include <__format/parser_std_format_spec.h>
+#include <__format/unicode.h>
+#include <__iterator/back_insert_iterator.h>
+#include <__memory/addressof.h>
+#include <__system_error/errc.h>
+#include <__type_traits/make_unsigned.h>
+#include <__utility/move.h>
+#include <string_view>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+
+namespace __formatter {
+
+#if _LIBCPP_STD_VER >= 20
+
+/// Writes a string using format's width estimation algorithm.
+///
+/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
+/// input is ASCII.
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI auto __write_string(
+    basic_string_view<_CharT> __str,
+    output_iterator<const _CharT&> auto __out_it,
+    __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
+  if (!__specs.__has_precision())
+    return __formatter::__write_string_no_precision(__str, _VSTD::move(__out_it), __specs);
+
+  int __size = __formatter::__truncate(__str, __specs.__precision_);
+
+  return __formatter::__write(__str.begin(), __str.end(), _VSTD::move(__out_it), __specs, __size);
+}
+
+#  endif // _LIBCPP_STD_VER >= 20
+# if _LIBCPP_STD_VER >= 23
+
+struct __nul_terminator {};
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
+  return *__cstr == _CharT('\0');
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void
+__write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
+  back_insert_iterator __out_it{__str};
+  std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
+
+  char __buffer[8];
+  to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
+  _LIBCPP_ASSERT_UNCATEGORIZED(__r.ec == errc(0), "Internal buffer too small");
+  std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
+
+  __str += _CharT('}');
+}
+
+// [format.string.escaped]/2.2.1.2
+// ...
+// then the sequence \u{hex-digit-sequence} is appended to E, where
+// hex-digit-sequence is the shortest hexadecimal representation of C using
+// lower-case hexadecimal digits.
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
+  __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
+}
+
+// [format.string.escaped]/2.2.3
+// Otherwise (X is a sequence of ill-formed code units), each code unit U is
+// appended to E in order as the sequence \x{hex-digit-sequence}, where
+// hex-digit-sequence is the shortest hexadecimal representation of U using
+// lower-case hexadecimal digits.
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
+  __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
+}
+
+template <class _CharT>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value) {
+#    ifdef _LIBCPP_HAS_NO_UNICODE
+  // For ASCII assume everything above 127 is printable.
+  if (__value > 127)
+    return false;
+#    endif
+
+  if (!__escaped_output_table::__needs_escape(__value))
+    return false;
+
+  __formatter::__write_well_formed_escaped_code_unit(__str, __value);
+  return true;
+}
+
+template <class _CharT>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
+  return static_cast<make_unsigned_t<_CharT>>(__value);
+}
+
+enum class _LIBCPP_ENUM_VIS __escape_quotation_mark { __apostrophe, __double_quote };
+
+// [format.string.escaped]/2
+template <class _CharT>
+[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
+__is_escaped_sequence_written(basic_string<_CharT>& __str, char32_t __value, __escape_quotation_mark __mark) {
+  // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
+  switch (__value) {
+  case _CharT('\t'):
+    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
+    return true;
+  case _CharT('\n'):
+    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
+    return true;
+  case _CharT('\r'):
+    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
+    return true;
+  case _CharT('\''):
+    if (__mark == __escape_quotation_mark::__apostrophe)
+      __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
+    else
+      __str += __value;
+    return true;
+  case _CharT('"'):
+    if (__mark == __escape_quotation_mark::__double_quote)
+      __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
+    else
+      __str += __value;
+    return true;
+  case _CharT('\\'):
+    __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
+    return true;
+
+  // 2.2.1.2 - Space
+  case _CharT(' '):
+    __str += __value;
+    return true;
+  }
+
+  // 2.2.2
+  //   Otherwise, if X is a shift sequence, the effect on E and further
+  //   decoding of S is unspecified.
+  // For now shift sequences are ignored and treated as Unicode. Other parts
+  // of the format library do the same. It's unknown how ostream treats them.
+  // TODO FMT determine what to do with shift sequences.
+
+  // 2.2.1.2.1 and 2.2.1.2.2 - Escape
+  return __formatter::__is_escaped_sequence_written(__str, __formatter::__to_char32(__value));
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI void
+__escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
+  __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
+
+  while (!__view.__at_end()) {
+    auto __first                                  = __view.__position();
+    typename __unicode::__consume_result __result = __view.__consume();
+    if (__result.__status == __unicode::__consume_result::__ok) {
+      if (!__formatter::__is_escaped_sequence_written(__str, __result.__code_point, __mark))
+        // 2.2.1.3 - Add the character
+        ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
+    } else {
+      // 2.2.3 sequence of ill-formed code units
+      ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
+        __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
+      });
+    }
+  }
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI auto
+__format_escaped_char(_CharT __value,
+                      output_iterator<const _CharT&> auto __out_it,
+                      __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
+  basic_string<_CharT> __str;
+  __str += _CharT('\'');
+  __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
+  __str += _CharT('\'');
+  return __formatter::__write(__str.data(), __str.data() + __str.size(), _VSTD::move(__out_it), __specs, __str.size());
+}
+
+template <class _CharT>
+_LIBCPP_HIDE_FROM_ABI auto
+__format_escaped_string(basic_string_view<_CharT> __values,
+                        output_iterator<const _CharT&> auto __out_it,
+                        __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
+  basic_string<_CharT> __str;
+  __str += _CharT('"');
+  __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
+  __str += _CharT('"');
+  return __formatter::__write_string(basic_string_view{__str}, _VSTD::move(__out_it), __specs);
+}
+
+#  endif // _LIBCPP_STD_VER >= 23
+
+} // namespace __formatter
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H

diff  --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in
index a9cebf891deb04..49d3f122d8c5d1 100644
--- a/libcxx/include/module.modulemap.in
+++ b/libcxx/include/module.modulemap.in
@@ -1015,6 +1015,7 @@ module std [system] {
       module range_formatter                 { private header "__format/range_formatter.h" }
       module unicode                         { private header "__format/unicode.h" }
       module width_estimation_table          { private header "__format/width_estimation_table.h" }
+      module write_escaped                   { private header "__format/write_escaped.h" }
     }
   }
   module forward_list {

diff  --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt
index 00b1f7408d0aad..f19302bb08090c 100644
--- a/libcxx/utils/data/ignore_format.txt
+++ b/libcxx/utils/data/ignore_format.txt
@@ -215,6 +215,7 @@ libcxx/include/__format/formatter_pointer.h
 libcxx/include/__format/formatter_string.h
 libcxx/include/__format/parser_std_format_spec.h
 libcxx/include/__format/unicode.h
+libcxx/include/__format/write_escaped.h
 libcxx/include/forward_list
 libcxx/include/fstream
 libcxx/include/__functional/binary_function.h


        


More information about the libcxx-commits mailing list