[libcxx-commits] [libcxx] 555214c - [libc++][format][2/6] Adds a __output_iterator.

Mark de Wever via libcxx-commits libcxx-commits at lists.llvm.org
Sat Mar 26 08:48:07 PDT 2022


Author: Mark de Wever
Date: 2022-03-26T16:48:01+01:00
New Revision: 555214cbcc79eac401482e1a302f726b54182546

URL: https://github.com/llvm/llvm-project/commit/555214cbcc79eac401482e1a302f726b54182546
DIFF: https://github.com/llvm/llvm-project/commit/555214cbcc79eac401482e1a302f726b54182546.diff

LOG: [libc++][format][2/6] Adds a __output_iterator.

Instead of using a temporary `string` in `__vformat_to_wrapped` use a new
generic iterator. This aids to reduce the number of template instantions
and avoids using a `string` to buffer the entire formatted output.

This changes the type of `format_context` and `wformat_context`, this can
still be done since the code isn't ABI stable yet.

Several approaches have been evaluated:
- Using a __output_buffer base class with:
  - a put function to store the buffer in its internal buffer
  - a virtual flush function to copy the internal buffer to the output
- Using a `function` to forward the output operation to the output buffer,
  much like the next method.
- Using a type erased function point to store the data in the buffer.
The last version resulted in the best performance. For some cases there's
still a loss of speed over the original method. This loss many becomes
apparent when large strings are copied to a pointer like iterator, before
the compiler optimized this using `memcpy`.

Reviewed By: ldionne, vitaut, #libc

Differential Revision: https://reviews.llvm.org/D110495

Added: 
    libcxx/include/__format/buffer.h
    libcxx/test/libcxx/diagnostics/detail.headers/format/buffer.module.verify.cpp
    libcxx/test/libcxx/utilities/format/format.formatter/format.context/types.compile.pass.cpp

Modified: 
    libcxx/include/CMakeLists.txt
    libcxx/include/__format/format_context.h
    libcxx/include/format
    libcxx/include/module.modulemap

Removed: 
    libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp


################################################################################
diff  --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 090adf0d47031..c4ee88945478f 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -184,6 +184,7 @@ set(files
   __filesystem/recursive_directory_iterator.h
   __filesystem/space_info.h
   __filesystem/u8path.h
+  __format/buffer.h
   __format/format_arg.h
   __format/format_args.h
   __format/format_context.h

diff  --git a/libcxx/include/__format/buffer.h b/libcxx/include/__format/buffer.h
new file mode 100644
index 0000000000000..32513d0350a63
--- /dev/null
+++ b/libcxx/include/__format/buffer.h
@@ -0,0 +1,207 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_BUFFER_H
+#define _LIBCPP___FORMAT_BUFFER_H
+
+#include <__algorithm/copy_n.h>
+#include <__algorithm/unwrap_iter.h>
+#include <__config>
+#include <__format/formatter.h> // for __char_type TODO FMT Move the concept?
+#include <__iterator/back_insert_iterator.h>
+#include <__iterator/concepts.h>
+#include <__iterator/iterator_traits.h>
+#include <__iterator/wrap_iter.h>
+#include <__utility/move.h>
+#include <concepts>
+#include <cstddef>
+#include <type_traits>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+namespace __format {
+
+/// A "buffer" that handles writing to the proper iterator.
+///
+/// This helper is used together with the @ref back_insert_iterator to offer
+/// type-erasure for the formatting functions. This reduces the number to
+/// template instantiations.
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __output_buffer {
+public:
+  using value_type = _CharT;
+
+  template <class _Tp>
+  _LIBCPP_HIDE_FROM_ABI explicit __output_buffer(_CharT* __ptr,
+                                                 size_t __capacity, _Tp* __obj)
+      : __ptr_(__ptr), __capacity_(__capacity),
+        __flush_([](_CharT* __p, size_t __size, void* __o) {
+          static_cast<_Tp*>(__o)->flush(__p, __size);
+        }),
+        __obj_(__obj) {}
+
+  _LIBCPP_HIDE_FROM_ABI void reset(_CharT* __ptr, size_t __capacity) {
+    __ptr_ = __ptr;
+    __capacity_ = __capacity;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI auto make_output_iterator() {
+    return back_insert_iterator{*this};
+  }
+
+  // TODO FMT It would be nice to have an overload taking a
+  // basic_string_view<_CharT> and append it directly.
+  _LIBCPP_HIDE_FROM_ABI void push_back(_CharT __c) {
+    __ptr_[__size_++] = __c;
+
+    // Profiling showed flushing after adding is more efficient than flushing
+    // when entering the function.
+    if (__size_ == __capacity_)
+      flush();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void flush() {
+    __flush_(__ptr_, __size_, __obj_);
+    __size_ = 0;
+  }
+
+private:
+  _CharT* __ptr_;
+  size_t __capacity_;
+  size_t __size_{0};
+  void (*__flush_)(_CharT*, size_t, void*);
+  void* __obj_;
+};
+
+/// A storage using an internal buffer.
+///
+/// This storage is used when writing a single element to the output iterator
+/// is expensive.
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __internal_storage {
+public:
+  _LIBCPP_HIDE_FROM_ABI _CharT* begin() { return __buffer_; }
+  _LIBCPP_HIDE_FROM_ABI size_t capacity() { return __buffer_size_; }
+
+private:
+  static constexpr size_t __buffer_size_ = 256 / sizeof(_CharT);
+  _CharT __buffer_[__buffer_size_];
+};
+
+/// A storage writing directly to the storage.
+///
+/// This requires the storage to be a contiguous buffer of \a _CharT.
+/// Since the output is directly written to the underlying storage this class
+/// is just an empty class.
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __direct_storage {};
+
+template <class _OutIt, class _CharT>
+concept __enable_direct_output = __formatter::__char_type<_CharT> &&
+    (same_as<_OutIt, _CharT*>
+#if _LIBCPP_DEBUG_LEVEL < 2
+     || same_as<_OutIt, __wrap_iter<_CharT*>>
+#endif
+    );
+
+/// Write policy for directly writing to the underlying output.
+template <class _OutIt, __formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __writer_direct {
+public:
+  _LIBCPP_HIDE_FROM_ABI explicit __writer_direct(_OutIt __out_it)
+      : __out_it_(__out_it) {}
+
+  _LIBCPP_HIDE_FROM_ABI auto out() { return __out_it_; }
+
+  _LIBCPP_HIDE_FROM_ABI void flush(_CharT*, size_t __size) {
+    // _OutIt can be a __wrap_iter<CharT*>. Therefore the original iterator
+    // is adjusted.
+    __out_it_ += __size;
+  }
+
+private:
+  _OutIt __out_it_;
+};
+
+/// Write policy for copying the buffer to the output.
+template <class _OutIt, __formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __writer_iterator {
+public:
+  _LIBCPP_HIDE_FROM_ABI explicit __writer_iterator(_OutIt __out_it)
+      : __out_it_{_VSTD::move(__out_it)} {}
+
+  _LIBCPP_HIDE_FROM_ABI auto out() { return __out_it_; }
+
+  _LIBCPP_HIDE_FROM_ABI void flush(_CharT* __ptr, size_t __size) {
+    __out_it_ = _VSTD::copy_n(__ptr, __size, _VSTD::move(__out_it_));
+  }
+
+private:
+  _OutIt __out_it_;
+};
+
+/// Selects the type of the writer used for the output iterator.
+template <class _OutIt, class _CharT>
+class _LIBCPP_TEMPLATE_VIS __writer_selector {
+public:
+  using type = conditional_t<__enable_direct_output<_OutIt, _CharT>,
+                             __writer_direct<_OutIt, _CharT>,
+                             __writer_iterator<_OutIt, _CharT>>;
+};
+
+/// The generic formatting buffer.
+template <class _OutIt, __formatter::__char_type _CharT>
+requires(output_iterator<_OutIt, const _CharT&>) class _LIBCPP_TEMPLATE_VIS
+    __format_buffer {
+  using _Storage =
+      conditional_t<__enable_direct_output<_OutIt, _CharT>,
+                    __direct_storage<_CharT>, __internal_storage<_CharT>>;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it) requires(
+      same_as<_Storage, __internal_storage<_CharT>>)
+      : __output_(__storage_.begin(), __storage_.capacity(), this),
+        __writer_(_VSTD::move(__out_it)) {}
+
+  _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it) requires(
+      same_as<_Storage, __direct_storage<_CharT>>)
+      : __output_(_VSTD::__unwrap_iter(__out_it), size_t(-1), this),
+        __writer_(_VSTD::move(__out_it)) {}
+
+  _LIBCPP_HIDE_FROM_ABI auto make_output_iterator() {
+    return __output_.make_output_iterator();
+  }
+
+  _LIBCPP_HIDE_FROM_ABI void flush(_CharT* __ptr, size_t __size) {
+    __writer_.flush(__ptr, __size);
+  }
+
+  _LIBCPP_HIDE_FROM_ABI _OutIt out() && {
+    __output_.flush();
+    return _VSTD::move(__writer_).out();
+  }
+
+private:
+  _LIBCPP_NO_UNIQUE_ADDRESS _Storage __storage_;
+  __output_buffer<_CharT> __output_;
+  typename __writer_selector<_OutIt, _CharT>::type __writer_;
+};
+} // namespace __format
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_BUFFER_H

diff  --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h
index 058f7a4bee8e1..e712a5ef4381a 100644
--- a/libcxx/include/__format/format_context.h
+++ b/libcxx/include/__format/format_context.h
@@ -12,6 +12,7 @@
 
 #include <__availability>
 #include <__config>
+#include <__format/buffer.h>
 #include <__format/format_args.h>
 #include <__format/format_fwd.h>
 #include <__iterator/back_insert_iterator.h>
@@ -60,16 +61,12 @@ __format_context_create(
 }
 #endif
 
-// TODO FMT Implement [format.context]/4
-// [Note 1: For a given type charT, implementations are encouraged to provide a
-// single instantiation of basic_format_context for appending to
-// basic_string<charT>, vector<charT>, or any other container with contiguous
-// storage by wrapping those in temporary objects with a uniform interface
-// (such as a span<charT>) and polymorphic reallocation. - end note]
-
-using format_context = basic_format_context<back_insert_iterator<string>, char>;
+using format_context =
+    basic_format_context<back_insert_iterator<__format::__output_buffer<char>>,
+                         char>;
 #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
-using wformat_context = basic_format_context<back_insert_iterator<wstring>, wchar_t>;
+using wformat_context = basic_format_context<
+    back_insert_iterator<__format::__output_buffer<wchar_t>>, wchar_t>;
 #endif
 
 template <class _OutIt, class _CharT>

diff  --git a/libcxx/include/format b/libcxx/include/format
index ebb84dcc83ae0..5ff62c3f0fa65 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -125,6 +125,7 @@ namespace std {
 #include <__algorithm/clamp.h>
 #include <__config>
 #include <__debug>
+#include <__format/buffer.h>
 #include <__format/format_arg.h>
 #include <__format/format_args.h>
 #include <__format/format_context.h>
@@ -292,11 +293,12 @@ requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt
         basic_format_parse_context{__fmt, __args.__size()},
         _VSTD::__format_context_create(_VSTD::move(__out_it), __args));
   else {
-    basic_string<_CharT> __str;
+    __format::__format_buffer<_OutIt, _CharT> __buffer{_VSTD::move(__out_it)};
     _VSTD::__format::__vformat_to(
         basic_format_parse_context{__fmt, __args.__size()},
-        _VSTD::__format_context_create(_VSTD::back_inserter(__str), __args));
-    return _VSTD::copy_n(__str.begin(), __str.size(), _VSTD::move(__out_it));
+        _VSTD::__format_context_create(__buffer.make_output_iterator(),
+                                       __args));
+    return _VSTD::move(__buffer).out();
   }
 }
 
@@ -417,12 +419,12 @@ requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt
         _VSTD::__format_context_create(_VSTD::move(__out_it), __args,
                                        _VSTD::move(__loc)));
   else {
-    basic_string<_CharT> __str;
+    __format::__format_buffer<_OutIt, _CharT> __buffer{_VSTD::move(__out_it)};
     _VSTD::__format::__vformat_to(
         basic_format_parse_context{__fmt, __args.__size()},
-        _VSTD::__format_context_create(_VSTD::back_inserter(__str), __args,
-                                       _VSTD::move(__loc)));
-    return _VSTD::copy_n(__str.begin(), __str.size(), _VSTD::move(__out_it));
+        _VSTD::__format_context_create(__buffer.make_output_iterator(),
+                                       __args, _VSTD::move(__loc)));
+    return _VSTD::move(__buffer).out();
   }
 }
 

diff  --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index faf700de533b3..b9d5feade37ee 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -512,6 +512,7 @@ module std [system] {
     export *
 
     module __format {
+      module buffer                   { private header "__format/buffer.h" }
       module format_arg               { private header "__format/format_arg.h" }
       module format_args              { private header "__format/format_args.h" }
       module format_context {

diff  --git a/libcxx/test/libcxx/diagnostics/detail.headers/format/buffer.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/format/buffer.module.verify.cpp
new file mode 100644
index 0000000000000..ef7cb9f0bf06f
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/detail.headers/format/buffer.module.verify.cpp
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: modules-build
+
+// WARNING: This test was generated by 'generate_private_header_tests.py'
+// and should not be edited manually.
+
+// expected-error@*:* {{use of private header from outside its module: '__format/buffer.h'}}
+#include <__format/buffer.h>

diff  --git a/libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp b/libcxx/test/libcxx/utilities/format/format.formatter/format.context/types.compile.pass.cpp
similarity index 88%
rename from libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp
rename to libcxx/test/libcxx/utilities/format/format.formatter/format.context/types.compile.pass.cpp
index aa2224bb0c4e6..159ad5b1ed241 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.formatter/format.context/types.compile.pass.cpp
@@ -24,7 +24,6 @@
 // using wformat_context = basic_format_context<unspecified, wchar_t>;
 
 #include <format>
-#include <string>
 #include <string_view>
 #include <type_traits>
 
@@ -97,13 +96,11 @@ constexpr void test() {
 }
 
 constexpr void test() {
-  test<std::back_insert_iterator<std::basic_string<char>>, char>();
+  test<std::back_insert_iterator<std::__format::__output_buffer<char>>, char>();
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
-  test<std::back_insert_iterator<std::basic_string<wchar_t>>, wchar_t>();
+  test<std::back_insert_iterator<std::__format::__output_buffer<wchar_t>>,
+       wchar_t>();
 #endif
-  test<std::back_insert_iterator<std::basic_string<char8_t>>, char8_t>();
-  test<std::back_insert_iterator<std::basic_string<char16_t>>, char16_t>();
-  test<std::back_insert_iterator<std::basic_string<char32_t>>, char32_t>();
 }
 
 template <class, class>
@@ -112,11 +109,12 @@ template <class It, class CharT>
 constexpr bool is_basic_format_context_specialization<std::basic_format_context<It, CharT>, CharT> = true;
 
 static_assert(is_basic_format_context_specialization<std::format_context, char>);
-LIBCPP_STATIC_ASSERT(
+static_assert(
     std::is_same_v<
         std::format_context,
         std::basic_format_context<
-            std::back_insert_iterator<std::basic_string<char>>, char>>);
+            std::back_insert_iterator<std::__format::__output_buffer<char>>,
+            char>>);
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
 static_assert(is_basic_format_context_specialization<std::wformat_context, wchar_t>);
@@ -124,8 +122,6 @@ LIBCPP_STATIC_ASSERT(
     std::is_same_v<
         std::wformat_context,
         std::basic_format_context<
-            std::back_insert_iterator<std::basic_string<wchar_t>>, wchar_t>>);
+            std::back_insert_iterator< std::__format::__output_buffer<wchar_t>>,
+            wchar_t>>);
 #endif
-
-// Required for MSVC internal test runner compatibility.
-int main(int, char**) { return 0; }


        


More information about the libcxx-commits mailing list