[libcxx-commits] [libcxx] [libc++] Added segmented iterator for std::fill (PR #104680)

via libcxx-commits libcxx-commits at lists.llvm.org
Wed Aug 28 07:02:10 PDT 2024


https://github.com/NoumanAmir657 updated https://github.com/llvm/llvm-project/pull/104680

>From 2782a4cf1b62a7db53d7cb6a3543db0259f4ef5e Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sat, 17 Aug 2024 23:18:52 +0500
Subject: [PATCH 01/15] [libcxx] Added segment iterator for fill

---
 libcxx/include/__algorithm/fill.h | 53 +++++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 1ce3eadb013d05..3ba37e7260a4a2 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -21,25 +21,66 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
 
-template <class _ForwardIterator, class _Tp>
+template <
+    class _ForwardIterator,
+    class _Tp,
+    __enable_if_t<
+        is_same<typename iterator_traits<_ForwardIterator>::iterator_category, forward_iterator_tag>::value ||
+            is_same<typename iterator_traits<_ForwardIterator>::iterator_category, bidirectional_iterator_tag>::value,
+        int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, forward_iterator_tag) {
+__fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
   for (; __first != __last; ++__first)
     *__first = __value;
 }
 
-template <class _RandomAccessIterator, class _Tp>
+template <class _RandomAccessIterator,
+          class _Tp,
+          __enable_if_t<(is_same<typename iterator_traits<_RandomAccessIterator>::iterator_category,
+                                 random_access_iterator_tag>::value ||
+                         is_same<typename iterator_traits<_RandomAccessIterator>::iterator_category,
+                                 contiguous_iterator_tag>::value) &&
+                            !__is_segmented_iterator<_RandomAccessIterator>::value,
+                        int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
-__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value, random_access_iterator_tag) {
+__fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) {
   std::fill_n(__first, __last - __first, __value);
 }
 
+template <class _SegmentedIterator,
+          class _Tp,
+          __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+__fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
+  using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+
+  auto __sfirst = _Traits::__segment(__first);
+  auto __slast  = _Traits::__segment(__last);
+
+  // We are in a single segment, so we might not be at the beginning or end
+  if (__sfirst == __slast) {
+    __fill(_Traits::__local(__first), _Traits::__local(__last), __value);
+    return;
+  }
+
+  // We have more than one segment. Iterate over the first segment, since we might not start at the beginning
+  __fill(_Traits::__local(__first), _Traits::__end(__sfirst), __value);
+  ++__sfirst;
+  // iterate over the segments which are guaranteed to be completely in the range
+  while (__sfirst != __slast) {
+    __fill(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __value);
+    ++__sfirst;
+  }
+  // iterate over the last segment
+  __fill(_Traits::__begin(__sfirst), _Traits::__local(__last), __value);
+}
+
 template <class _ForwardIterator, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
-  std::__fill(__first, __last, __value, typename iterator_traits<_ForwardIterator>::iterator_category());
+  std::__fill(__first, __last, __value);
 }
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP___ALGORITHM_FILL_H
+#endif // _LIBCPP___ALGORITHM_FILL_H
\ No newline at end of file

>From 9caf495c7e6e17ffd9da59858f33246f825a0b02 Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sat, 17 Aug 2024 23:37:14 +0500
Subject: [PATCH 02/15] Fixed newline

---
 libcxx/include/__algorithm/fill.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 3ba37e7260a4a2..da2bda4713c373 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -83,4 +83,4 @@ fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
 
 _LIBCPP_END_NAMESPACE_STD
 
-#endif // _LIBCPP___ALGORITHM_FILL_H
\ No newline at end of file
+#endif // _LIBCPP___ALGORITHM_FILL_H

>From 37dab9d3dfc627b8ca7b14ad3475caab2c4f81cd Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sun, 18 Aug 2024 00:57:05 +0500
Subject: [PATCH 03/15] Fixed the logic of templates

---
 libcxx/include/__algorithm/fill.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index da2bda4713c373..b8d893e2ee7339 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -12,6 +12,7 @@
 #include <__algorithm/fill_n.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
+#include <__iterator/segmented_iterator.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -24,10 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <
     class _ForwardIterator,
     class _Tp,
-    __enable_if_t<
-        is_same<typename iterator_traits<_ForwardIterator>::iterator_category, forward_iterator_tag>::value ||
-            is_same<typename iterator_traits<_ForwardIterator>::iterator_category, bidirectional_iterator_tag>::value,
-        int> = 0>
+    __enable_if_t<!__has_random_access_iterator_category<_ForwardIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
   for (; __first != __last; ++__first)
@@ -36,12 +34,8 @@ __fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
 
 template <class _RandomAccessIterator,
           class _Tp,
-          __enable_if_t<(is_same<typename iterator_traits<_RandomAccessIterator>::iterator_category,
-                                 random_access_iterator_tag>::value ||
-                         is_same<typename iterator_traits<_RandomAccessIterator>::iterator_category,
-                                 contiguous_iterator_tag>::value) &&
-                            !__is_segmented_iterator<_RandomAccessIterator>::value,
-                        int> = 0>
+          __enable_if_t<__has_random_access_iterator_category<_RandomAccessIterator>::value &&
+                        !__is_segmented_iterator<_RandomAccessIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) {
   std::fill_n(__first, __last - __first, __value);

>From 8bb83d67e8dd050be8b06d2a1b93327c4e2b0dfb Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sun, 18 Aug 2024 01:42:46 +0500
Subject: [PATCH 04/15] Used std::for_each to remove redundant code

---
 libcxx/include/__algorithm/fill.h | 49 ++++++++++++++++---------------
 1 file changed, 26 insertions(+), 23 deletions(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index b8d893e2ee7339..32439f03dc9cff 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -10,6 +10,7 @@
 #define _LIBCPP___ALGORITHM_FILL_H
 
 #include <__algorithm/fill_n.h>
+#include <__algorithm/for_each.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/segmented_iterator.h>
@@ -22,10 +23,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 // fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
 
-template <
-    class _ForwardIterator,
-    class _Tp,
-    __enable_if_t<!__has_random_access_iterator_category<_ForwardIterator>::value, int> = 0>
+template < class _ForwardIterator,
+           class _Tp,
+           __enable_if_t<!__has_random_access_iterator_category<_ForwardIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
   for (; __first != __last; ++__first)
@@ -35,7 +35,8 @@ __fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
 template <class _RandomAccessIterator,
           class _Tp,
           __enable_if_t<__has_random_access_iterator_category<_RandomAccessIterator>::value &&
-                        !__is_segmented_iterator<_RandomAccessIterator>::value, int> = 0>
+                            !__is_segmented_iterator<_RandomAccessIterator>::value,
+                        int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value) {
   std::fill_n(__first, __last - __first, __value);
@@ -46,27 +47,29 @@ template <class _SegmentedIterator,
           __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
-  using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
+  // using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
 
-  auto __sfirst = _Traits::__segment(__first);
-  auto __slast  = _Traits::__segment(__last);
+  // auto __sfirst = _Traits::__segment(__first);
+  // auto __slast  = _Traits::__segment(__last);
 
-  // We are in a single segment, so we might not be at the beginning or end
-  if (__sfirst == __slast) {
-    __fill(_Traits::__local(__first), _Traits::__local(__last), __value);
-    return;
-  }
+  // // We are in a single segment, so we might not be at the beginning or end
+  // if (__sfirst == __slast) {
+  //   __fill(_Traits::__local(__first), _Traits::__local(__last), __value);
+  //   return;
+  // }
 
-  // We have more than one segment. Iterate over the first segment, since we might not start at the beginning
-  __fill(_Traits::__local(__first), _Traits::__end(__sfirst), __value);
-  ++__sfirst;
-  // iterate over the segments which are guaranteed to be completely in the range
-  while (__sfirst != __slast) {
-    __fill(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __value);
-    ++__sfirst;
-  }
-  // iterate over the last segment
-  __fill(_Traits::__begin(__sfirst), _Traits::__local(__last), __value);
+  // // We have more than one segment. Iterate over the first segment, since we might not start at the beginning
+  // __fill(_Traits::__local(__first), _Traits::__end(__sfirst), __value);
+  // ++__sfirst;
+  // // iterate over the segments which are guaranteed to be completely in the range
+  // while (__sfirst != __slast) {
+  //   __fill(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __value);
+  //   ++__sfirst;
+  // }
+  // // iterate over the last segment
+  // __fill(_Traits::__begin(__sfirst), _Traits::__local(__last), __value);
+
+  std::for_each(__first, __last, [__value](_Tp& val) { val = __value; });
 }
 
 template <class _ForwardIterator, class _Tp>

>From 0e799684c5cbcb0c7ad60f137f9c57c714d9b660 Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sun, 18 Aug 2024 01:53:47 +0500
Subject: [PATCH 05/15] Fixed case

---
 libcxx/include/__algorithm/fill.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 32439f03dc9cff..df9eb6ed16aa23 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -69,7 +69,7 @@ __fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value
   // // iterate over the last segment
   // __fill(_Traits::__begin(__sfirst), _Traits::__local(__last), __value);
 
-  std::for_each(__first, __last, [__value](_Tp& val) { val = __value; });
+  std::for_each(__first, __last, [__value](_Tp& __val) { __val = __value; });
 }
 
 template <class _ForwardIterator, class _Tp>

>From 508659875b460f106310f78ca4c05d88c85e7dcb Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Thu, 22 Aug 2024 16:26:51 +0500
Subject: [PATCH 06/15] Added benchmark

---
 libcxx/include/__algorithm/fill.h             | 22 -------------------
 .../test/benchmarks/algorithms/fill.bench.cpp | 10 +++++++++
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index df9eb6ed16aa23..41f37c46fe471d 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -47,28 +47,6 @@ template <class _SegmentedIterator,
           __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
-  // using _Traits = __segmented_iterator_traits<_SegmentedIterator>;
-
-  // auto __sfirst = _Traits::__segment(__first);
-  // auto __slast  = _Traits::__segment(__last);
-
-  // // We are in a single segment, so we might not be at the beginning or end
-  // if (__sfirst == __slast) {
-  //   __fill(_Traits::__local(__first), _Traits::__local(__last), __value);
-  //   return;
-  // }
-
-  // // We have more than one segment. Iterate over the first segment, since we might not start at the beginning
-  // __fill(_Traits::__local(__first), _Traits::__end(__sfirst), __value);
-  // ++__sfirst;
-  // // iterate over the segments which are guaranteed to be completely in the range
-  // while (__sfirst != __slast) {
-  //   __fill(_Traits::__begin(__sfirst), _Traits::__end(__sfirst), __value);
-  //   ++__sfirst;
-  // }
-  // // iterate over the last segment
-  // __fill(_Traits::__begin(__sfirst), _Traits::__local(__last), __value);
-
   std::for_each(__first, __last, [__value](_Tp& __val) { __val = __value; });
 }
 
diff --git a/libcxx/test/benchmarks/algorithms/fill.bench.cpp b/libcxx/test/benchmarks/algorithms/fill.bench.cpp
index 40f37425c394cf..313fc7f849cbe5 100644
--- a/libcxx/test/benchmarks/algorithms/fill.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/fill.bench.cpp
@@ -9,6 +9,7 @@
 #include <algorithm>
 #include <benchmark/benchmark.h>
 #include <vector>
+#include <deque>
 
 static void bm_fill_n(benchmark::State& state) {
   std::vector<bool> vec1(state.range());
@@ -37,6 +38,15 @@ static void bm_fill(benchmark::State& state) {
 }
 BENCHMARK(bm_fill)->DenseRange(1, 8)->Range(16, 1 << 20);
 
+static void bm_deque_fill(benchmark::State& state) {
+  std::deque<bool> vec1(state.range());
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(vec1);
+    std::fill(vec1.begin(), vec1.end(), false);
+  }
+}
+BENCHMARK(bm_fill)->DenseRange(1, 8)->Range(16, 1 << 20);
+
 static void bm_ranges_fill(benchmark::State& state) {
   std::vector<bool> vec1(state.range());
   for (auto _ : state) {

>From 24fcb1d7f47c38f238e3022f6ed4a2d96793e69c Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sat, 24 Aug 2024 12:00:13 +0500
Subject: [PATCH 07/15] Added call to benchmark

---
 libcxx/include/__algorithm/fill.h                | 1 +
 libcxx/test/benchmarks/algorithms/fill.bench.cpp | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 41f37c46fe471d..954814ff4c00d1 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -59,3 +59,4 @@ fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___ALGORITHM_FILL_H
+
diff --git a/libcxx/test/benchmarks/algorithms/fill.bench.cpp b/libcxx/test/benchmarks/algorithms/fill.bench.cpp
index 313fc7f849cbe5..ccfb866c147b12 100644
--- a/libcxx/test/benchmarks/algorithms/fill.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/fill.bench.cpp
@@ -45,7 +45,7 @@ static void bm_deque_fill(benchmark::State& state) {
     std::fill(vec1.begin(), vec1.end(), false);
   }
 }
-BENCHMARK(bm_fill)->DenseRange(1, 8)->Range(16, 1 << 20);
+BENCHMARK(bm_deque_fill)->DenseRange(1, 8)->Range(16, 1 << 20);
 
 static void bm_ranges_fill(benchmark::State& state) {
   std::vector<bool> vec1(state.range());

>From 668139820e5f943912e1c7698d6619182aa733b0 Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Sat, 24 Aug 2024 12:24:48 +0500
Subject: [PATCH 08/15] Added release note

---
 libcxx/docs/ReleaseNotes/20.rst   | 3 +++
 libcxx/include/__algorithm/fill.h | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/libcxx/docs/ReleaseNotes/20.rst b/libcxx/docs/ReleaseNotes/20.rst
index fe9f4c1973cdb4..f7e82d7b932cca 100644
--- a/libcxx/docs/ReleaseNotes/20.rst
+++ b/libcxx/docs/ReleaseNotes/20.rst
@@ -46,6 +46,9 @@ Improvements and New Features
 
 - The ``lexicographical_compare`` and ``ranges::lexicographical_compare`` algorithms have been optimized for trivially
   equality comparable types, resulting in a performance improvement of up to 40x.
+  
+- ``std::fill`` has been optimized for segmented iterators like ``std::deque::iterator`` in C++23 and
+  later, which can lead up to 40x performance improvements.
 
 
 Deprecations and Removals
diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 954814ff4c00d1..41f37c46fe471d 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -59,4 +59,3 @@ fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
 _LIBCPP_END_NAMESPACE_STD
 
 #endif // _LIBCPP___ALGORITHM_FILL_H
-

>From 47a770f6dfdd572fb7d16399a1088d9e1f35c1bb Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Mon, 26 Aug 2024 09:57:13 +0500
Subject: [PATCH 09/15] Added tests

---
 .../alg.modifying.operations/alg.fill/fill.pass.cpp    | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
index 481d565961b2b5..86cc7d46d15f65 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill.pass.cpp
@@ -17,6 +17,7 @@
 #include <array>
 #include <cassert>
 #include <vector>
+#include <deque>
 
 #include "test_macros.h"
 #include "test_iterators.h"
@@ -101,5 +102,14 @@ int main(int, char**) {
   static_assert(test());
 #endif
 
+  // check that segmented iterators work properly
+  int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049};
+  for (const int size : sizes) {
+    std::deque<bool> in(size, false);
+    std::deque<bool> expected(size, true);
+    std::fill(in.begin(), in.end(), true);
+    assert(in == expected);
+  }
+
   return 0;
 }

>From e60ee44f03e5c5584322fac07b9646b6be7baaaa Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Mon, 26 Aug 2024 11:08:05 +0500
Subject: [PATCH 10/15] Updated transitive includes

---
 libcxx/include/__algorithm/fill.h             |  1 +
 .../test/libcxx/transitive_includes/cxx03.csv | 21 ++-----------------
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 41f37c46fe471d..93aa980d8f12dc 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -14,6 +14,7 @@
 #include <__config>
 #include <__iterator/iterator_traits.h>
 #include <__iterator/segmented_iterator.h>
+#include <__type_traits/enable_if.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv
index fd2cd7f1c2d961..f4c88f7a3bf7ad 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx03.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv
@@ -86,7 +86,6 @@ bit version
 bitset climits
 bitset concepts
 bitset cstddef
-bitset cstdint
 bitset cstdlib
 bitset cstring
 bitset cwchar
@@ -94,6 +93,7 @@ bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -250,21 +250,11 @@ experimental/simd limits
 experimental/utility utility
 filesystem compare
 filesystem concepts
-filesystem cstddef
-filesystem cstdint
 filesystem cstdlib
 filesystem cstring
-filesystem ctime
-filesystem iomanip
 filesystem iosfwd
-filesystem limits
-filesystem locale
 filesystem new
-filesystem ratio
-filesystem string
-filesystem string_view
 filesystem system_error
-filesystem type_traits
 filesystem version
 format array
 format cctype
@@ -798,15 +788,7 @@ stack version
 stdexcept cstdlib
 stdexcept exception
 stdexcept iosfwd
-stop_token atomic
-stop_token cstddef
-stop_token cstdint
-stop_token cstring
-stop_token ctime
 stop_token iosfwd
-stop_token limits
-stop_token ratio
-stop_token type_traits
 stop_token version
 streambuf cctype
 streambuf climits
@@ -988,6 +970,7 @@ valarray functional
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray stdexcept
 valarray type_traits
 valarray version

>From 746b58a5cbf00f48532682ade4f7a44d74aa3bdd Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Mon, 26 Aug 2024 11:31:16 +0500
Subject: [PATCH 11/15] Fixed transitive includes

---
 .../test/libcxx/transitive_includes/cxx11.csv | 21 ++---------
 .../test/libcxx/transitive_includes/cxx14.csv | 36 +++++++------------
 .../test/libcxx/transitive_includes/cxx17.csv | 23 ++++++------
 .../test/libcxx/transitive_includes/cxx20.csv |  3 +-
 .../test/libcxx/transitive_includes/cxx23.csv |  3 +-
 .../test/libcxx/transitive_includes/cxx26.csv |  3 +-
 6 files changed, 33 insertions(+), 56 deletions(-)

diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv
index 04122fd0f4571a..e75a033838749d 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx11.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv
@@ -86,7 +86,6 @@ bit version
 bitset climits
 bitset concepts
 bitset cstddef
-bitset cstdint
 bitset cstdlib
 bitset cstring
 bitset cwchar
@@ -94,6 +93,7 @@ bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -251,21 +251,11 @@ experimental/simd limits
 experimental/utility utility
 filesystem compare
 filesystem concepts
-filesystem cstddef
-filesystem cstdint
 filesystem cstdlib
 filesystem cstring
-filesystem ctime
-filesystem iomanip
 filesystem iosfwd
-filesystem limits
-filesystem locale
 filesystem new
-filesystem ratio
-filesystem string
-filesystem string_view
 filesystem system_error
-filesystem type_traits
 filesystem version
 format array
 format cctype
@@ -804,15 +794,7 @@ stack version
 stdexcept cstdlib
 stdexcept exception
 stdexcept iosfwd
-stop_token atomic
-stop_token cstddef
-stop_token cstdint
-stop_token cstring
-stop_token ctime
 stop_token iosfwd
-stop_token limits
-stop_token ratio
-stop_token type_traits
 stop_token version
 streambuf cctype
 streambuf climits
@@ -995,6 +977,7 @@ valarray functional
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray stdexcept
 valarray type_traits
 valarray version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 42c742bead0c1f..6991f3ae3238e1 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -7,7 +7,6 @@ algorithm cstdint
 algorithm cstdlib
 algorithm cstring
 algorithm cwchar
-algorithm execution
 algorithm initializer_list
 algorithm iosfwd
 algorithm iterator
@@ -16,11 +15,11 @@ algorithm memory
 algorithm new
 algorithm optional
 algorithm stdexcept
+algorithm tuple
 algorithm type_traits
 algorithm utility
 algorithm version
 any atomic
-any chrono
 any concepts
 any cstddef
 any cstdint
@@ -87,7 +86,6 @@ bit version
 bitset climits
 bitset concepts
 bitset cstddef
-bitset cstdint
 bitset cstdlib
 bitset cstring
 bitset cwchar
@@ -95,6 +93,7 @@ bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -115,15 +114,21 @@ charconv new
 charconv type_traits
 charconv version
 chrono bit
+chrono charconv
+chrono cmath
 chrono compare
 chrono concepts
 chrono cstddef
 chrono cstdint
 chrono cstring
 chrono ctime
+chrono format
 chrono forward_list
 chrono limits
+chrono locale
+chrono ostream
 chrono ratio
+chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
@@ -194,6 +199,9 @@ condition_variable type_traits
 condition_variable typeinfo
 condition_variable version
 coroutine compare
+coroutine cstddef
+coroutine cstdint
+coroutine cstring
 coroutine iosfwd
 coroutine limits
 coroutine type_traits
@@ -359,7 +367,6 @@ functional vector
 functional version
 future atomic
 future cerrno
-future chrono
 future cstddef
 future cstdint
 future cstdlib
@@ -589,6 +596,7 @@ numeric iterator
 numeric limits
 numeric new
 numeric optional
+numeric tuple
 numeric type_traits
 numeric version
 optional atomic
@@ -645,18 +653,11 @@ print stdexcept
 print string
 print string_view
 print version
-queue compare
-queue concepts
 queue cstddef
-queue cstdlib
 queue deque
-queue functional
 queue initializer_list
 queue limits
 queue new
-queue type_traits
-queue vector
-queue version
 random algorithm
 random climits
 random cmath
@@ -754,16 +755,12 @@ set stdexcept
 set tuple
 set type_traits
 set version
-shared_mutex cerrno
 shared_mutex cstddef
 shared_mutex ctime
 shared_mutex limits
 shared_mutex ratio
-shared_mutex stdexcept
 shared_mutex string
-shared_mutex system_error
 shared_mutex type_traits
-shared_mutex version
 source_location cstdint
 source_location version
 span array
@@ -807,15 +804,7 @@ stack version
 stdexcept cstdlib
 stdexcept exception
 stdexcept iosfwd
-stop_token atomic
-stop_token cstddef
-stop_token cstdint
-stop_token cstring
-stop_token ctime
 stop_token iosfwd
-stop_token limits
-stop_token ratio
-stop_token type_traits
 stop_token version
 streambuf cctype
 streambuf climits
@@ -998,6 +987,7 @@ valarray functional
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray stdexcept
 valarray type_traits
 valarray version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index bc0659127d4bf5..acfec9ec537b74 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -20,7 +20,6 @@ algorithm type_traits
 algorithm utility
 algorithm version
 any atomic
-any chrono
 any concepts
 any cstddef
 any cstdint
@@ -87,7 +86,6 @@ bit version
 bitset climits
 bitset concepts
 bitset cstddef
-bitset cstdint
 bitset cstdlib
 bitset cstring
 bitset cwchar
@@ -95,6 +93,7 @@ bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -115,15 +114,21 @@ charconv new
 charconv type_traits
 charconv version
 chrono bit
+chrono charconv
+chrono cmath
 chrono compare
 chrono concepts
 chrono cstddef
 chrono cstdint
 chrono cstring
 chrono ctime
+chrono format
 chrono forward_list
 chrono limits
+chrono locale
+chrono ostream
 chrono ratio
+chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
@@ -194,6 +199,9 @@ condition_variable type_traits
 condition_variable typeinfo
 condition_variable version
 coroutine compare
+coroutine cstddef
+coroutine cstdint
+coroutine cstring
 coroutine iosfwd
 coroutine limits
 coroutine type_traits
@@ -359,7 +367,6 @@ functional vector
 functional version
 future atomic
 future cerrno
-future chrono
 future cstddef
 future cstdint
 future cstdlib
@@ -684,6 +691,7 @@ ranges optional
 ranges span
 ranges tuple
 ranges type_traits
+ranges variant
 ranges version
 ratio climits
 ratio cstdint
@@ -886,15 +894,7 @@ syncstream shared_mutex
 syncstream streambuf
 syncstream string
 system_error cerrno
-system_error compare
-system_error cstddef
-system_error cstdint
-system_error cstring
-system_error limits
-system_error stdexcept
 system_error string
-system_error type_traits
-system_error version
 thread array
 thread atomic
 thread cctype
@@ -999,6 +999,7 @@ valarray functional
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray stdexcept
 valarray type_traits
 valarray version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv
index fed0944f0219c4..cfbd3e38cd2553 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx20.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv
@@ -86,7 +86,6 @@ bit version
 bitset climits
 bitset concepts
 bitset cstddef
-bitset cstdint
 bitset cstdlib
 bitset cstring
 bitset cwchar
@@ -94,6 +93,7 @@ bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -1006,6 +1006,7 @@ valarray functional
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray stdexcept
 valarray type_traits
 valarray version
diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv
index 53f99384a7f573..b957bd9b209fd3 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx23.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv
@@ -49,13 +49,13 @@ bit limits
 bit version
 bitset climits
 bitset cstddef
-bitset cstdint
 bitset cstring
 bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -691,6 +691,7 @@ valarray cstdint
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray version
 variant compare
 variant cstddef
diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv
index 53f99384a7f573..b957bd9b209fd3 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx26.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv
@@ -49,13 +49,13 @@ bit limits
 bit version
 bitset climits
 bitset cstddef
-bitset cstdint
 bitset cstring
 bitset cwchar
 bitset initializer_list
 bitset iosfwd
 bitset limits
 bitset new
+bitset optional
 bitset stdexcept
 bitset string
 bitset string_view
@@ -691,6 +691,7 @@ valarray cstdint
 valarray initializer_list
 valarray limits
 valarray new
+valarray optional
 valarray version
 variant compare
 variant cstddef

>From 1ed8fe54fa97d755db035852d06f4828547817ec Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Mon, 26 Aug 2024 12:37:02 +0500
Subject: [PATCH 12/15] Updated transitive includes

---
 .../test/libcxx/transitive_includes/cxx14.csv | 35 ++++++++-----------
 .../test/libcxx/transitive_includes/cxx17.csv | 28 ++++++---------
 2 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv
index 6991f3ae3238e1..c87cec73db5b6f 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx14.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv
@@ -7,6 +7,7 @@ algorithm cstdint
 algorithm cstdlib
 algorithm cstring
 algorithm cwchar
+algorithm execution
 algorithm initializer_list
 algorithm iosfwd
 algorithm iterator
@@ -15,11 +16,11 @@ algorithm memory
 algorithm new
 algorithm optional
 algorithm stdexcept
-algorithm tuple
 algorithm type_traits
 algorithm utility
 algorithm version
 any atomic
+any chrono
 any concepts
 any cstddef
 any cstdint
@@ -114,21 +115,15 @@ charconv new
 charconv type_traits
 charconv version
 chrono bit
-chrono charconv
-chrono cmath
 chrono compare
 chrono concepts
 chrono cstddef
 chrono cstdint
 chrono cstring
 chrono ctime
-chrono format
 chrono forward_list
 chrono limits
-chrono locale
-chrono ostream
 chrono ratio
-chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
@@ -199,9 +194,6 @@ condition_variable type_traits
 condition_variable typeinfo
 condition_variable version
 coroutine compare
-coroutine cstddef
-coroutine cstdint
-coroutine cstring
 coroutine iosfwd
 coroutine limits
 coroutine type_traits
@@ -262,21 +254,11 @@ experimental/type_traits type_traits
 experimental/utility utility
 filesystem compare
 filesystem concepts
-filesystem cstddef
-filesystem cstdint
 filesystem cstdlib
 filesystem cstring
-filesystem ctime
-filesystem iomanip
 filesystem iosfwd
-filesystem limits
-filesystem locale
 filesystem new
-filesystem ratio
-filesystem string
-filesystem string_view
 filesystem system_error
-filesystem type_traits
 filesystem version
 format array
 format cctype
@@ -367,6 +349,7 @@ functional vector
 functional version
 future atomic
 future cerrno
+future chrono
 future cstddef
 future cstdint
 future cstdlib
@@ -596,7 +579,6 @@ numeric iterator
 numeric limits
 numeric new
 numeric optional
-numeric tuple
 numeric type_traits
 numeric version
 optional atomic
@@ -653,11 +635,18 @@ print stdexcept
 print string
 print string_view
 print version
+queue compare
+queue concepts
 queue cstddef
+queue cstdlib
 queue deque
+queue functional
 queue initializer_list
 queue limits
 queue new
+queue type_traits
+queue vector
+queue version
 random algorithm
 random climits
 random cmath
@@ -755,12 +744,16 @@ set stdexcept
 set tuple
 set type_traits
 set version
+shared_mutex cerrno
 shared_mutex cstddef
 shared_mutex ctime
 shared_mutex limits
 shared_mutex ratio
+shared_mutex stdexcept
 shared_mutex string
+shared_mutex system_error
 shared_mutex type_traits
+shared_mutex version
 source_location cstdint
 source_location version
 span array
diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv
index acfec9ec537b74..840c777bbf7b05 100644
--- a/libcxx/test/libcxx/transitive_includes/cxx17.csv
+++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv
@@ -20,6 +20,7 @@ algorithm type_traits
 algorithm utility
 algorithm version
 any atomic
+any chrono
 any concepts
 any cstddef
 any cstdint
@@ -114,21 +115,15 @@ charconv new
 charconv type_traits
 charconv version
 chrono bit
-chrono charconv
-chrono cmath
 chrono compare
 chrono concepts
 chrono cstddef
 chrono cstdint
 chrono cstring
 chrono ctime
-chrono format
 chrono forward_list
 chrono limits
-chrono locale
-chrono ostream
 chrono ratio
-chrono sstream
 chrono stdexcept
 chrono string
 chrono string_view
@@ -199,9 +194,6 @@ condition_variable type_traits
 condition_variable typeinfo
 condition_variable version
 coroutine compare
-coroutine cstddef
-coroutine cstdint
-coroutine cstring
 coroutine iosfwd
 coroutine limits
 coroutine type_traits
@@ -367,6 +359,7 @@ functional vector
 functional version
 future atomic
 future cerrno
+future chrono
 future cstddef
 future cstdint
 future cstdlib
@@ -691,7 +684,6 @@ ranges optional
 ranges span
 ranges tuple
 ranges type_traits
-ranges variant
 ranges version
 ratio climits
 ratio cstdint
@@ -816,15 +808,7 @@ stack version
 stdexcept cstdlib
 stdexcept exception
 stdexcept iosfwd
-stop_token atomic
-stop_token cstddef
-stop_token cstdint
-stop_token cstring
-stop_token ctime
 stop_token iosfwd
-stop_token limits
-stop_token ratio
-stop_token type_traits
 stop_token version
 streambuf cctype
 streambuf climits
@@ -894,7 +878,15 @@ syncstream shared_mutex
 syncstream streambuf
 syncstream string
 system_error cerrno
+system_error compare
+system_error cstddef
+system_error cstdint
+system_error cstring
+system_error limits
+system_error stdexcept
 system_error string
+system_error type_traits
+system_error version
 thread array
 thread atomic
 thread cctype

>From 6d3fc3dee423a9fa5ce90a2a25af2f0f1aa37508 Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Mon, 26 Aug 2024 16:38:29 +0500
Subject: [PATCH 13/15] Added functor

---
 libcxx/include/__algorithm/fill.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 93aa980d8f12dc..52f7bd7b15bae2 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -43,12 +43,21 @@ __fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& _
   std::fill_n(__first, __last - __first, __value);
 }
 
+template <class _Tp>
+struct __fill_segment {
+  const _Tp& __value_;
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __fill_segment(const _Tp& __value) : __value_(__value) {}
+
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR void operator()(_Tp& __val) const { __val = __value_; }
+};
+
 template <class _SegmentedIterator,
           class _Tp,
           __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
-  std::for_each(__first, __last, [__value](_Tp& __val) { __val = __value; });
+  std::for_each(__first, __last, __fill_segment<_Tp>(__value));
 }
 
 template <class _ForwardIterator, class _Tp>

>From 7fa773e17c3b44e5613a803abf5ae2a0c16136ff Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Mon, 26 Aug 2024 18:04:35 +0500
Subject: [PATCH 14/15] Fixed

---
 libcxx/include/__algorithm/fill.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 52f7bd7b15bae2..405b1dfb419600 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -49,7 +49,7 @@ struct __fill_segment {
 
   _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __fill_segment(const _Tp& __value) : __value_(__value) {}
 
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR void operator()(_Tp& __val) const { __val = __value_; }
+  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void operator()(_Tp& __val) const { __val = __value_; }
 };
 
 template <class _SegmentedIterator,

>From 8f0605f8aa52e113490275d71e4acdedfff51546 Mon Sep 17 00:00:00 2001
From: nouman-10x <noumanamir453 at gmail.com>
Date: Wed, 28 Aug 2024 18:44:31 +0500
Subject: [PATCH 15/15] Optimized fill_n

---
 libcxx/include/__algorithm/fill.h             | 15 ++++----------
 libcxx/include/__algorithm/fill_n.h           | 20 +++++++++++++++++--
 .../test/benchmarks/algorithms/fill.bench.cpp |  9 +++++++++
 .../alg.fill/fill_n.pass.cpp                  | 13 ++++++++++++
 4 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/libcxx/include/__algorithm/fill.h b/libcxx/include/__algorithm/fill.h
index 405b1dfb419600..fb84c6b78f6d46 100644
--- a/libcxx/include/__algorithm/fill.h
+++ b/libcxx/include/__algorithm/fill.h
@@ -9,7 +9,6 @@
 #ifndef _LIBCPP___ALGORITHM_FILL_H
 #define _LIBCPP___ALGORITHM_FILL_H
 
-#include <__algorithm/fill_n.h>
 #include <__algorithm/for_each.h>
 #include <__config>
 #include <__iterator/iterator_traits.h>
@@ -23,6 +22,9 @@
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 // fill isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
+template <class _OutputIterator, class _Size, class _Tp>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
+fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
 
 template < class _ForwardIterator,
            class _Tp,
@@ -43,21 +45,12 @@ __fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& _
   std::fill_n(__first, __last - __first, __value);
 }
 
-template <class _Tp>
-struct __fill_segment {
-  const _Tp& __value_;
-
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __fill_segment(const _Tp& __value) : __value_(__value) {}
-
-  _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void operator()(_Tp& __val) const { __val = __value_; }
-};
-
 template <class _SegmentedIterator,
           class _Tp,
           __enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 __fill(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value) {
-  std::for_each(__first, __last, __fill_segment<_Tp>(__value));
+  std::for_each(__first, __last, [__value](_Tp& __val) { __val = __value; });
 }
 
 template <class _ForwardIterator, class _Tp>
diff --git a/libcxx/include/__algorithm/fill_n.h b/libcxx/include/__algorithm/fill_n.h
index f29633f88087f0..817a5578786246 100644
--- a/libcxx/include/__algorithm/fill_n.h
+++ b/libcxx/include/__algorithm/fill_n.h
@@ -15,6 +15,8 @@
 #include <__iterator/iterator_traits.h>
 #include <__memory/pointer_traits.h>
 #include <__utility/convert_to_integral.h>
+#include <__iterator/segmented_iterator.h>
+#include <__type_traits/enable_if.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -26,8 +28,15 @@ _LIBCPP_PUSH_MACROS
 _LIBCPP_BEGIN_NAMESPACE_STD
 
 // fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
+template <class _ForwardIterator, class _Tp>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
+fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value);
 
-template <class _OutputIterator, class _Size, class _Tp>
+template <class _OutputIterator, class _Size, class _Tp, enable_if_t<!__is_segmented_iterator<_OutputIterator>::value, int> = 0>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
+__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
+
+template <class _OutputIterator, class _Size, class _Tp, enable_if_t<__is_segmented_iterator<_OutputIterator>::value, int> = 0>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
 
@@ -77,7 +86,7 @@ __fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) {
   return __first + __n;
 }
 
-template <class _OutputIterator, class _Size, class _Tp>
+template <class _OutputIterator, class _Size, class _Tp, enable_if_t<!__is_segmented_iterator<_OutputIterator>::value, int>>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
   for (; __n > 0; ++__first, (void)--__n)
@@ -85,6 +94,13 @@ __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
   return __first;
 }
 
+template <class _OutputIterator, class _Size, class _Tp, enable_if_t<__is_segmented_iterator<_OutputIterator>::value, int>>
+inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
+__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
+  std::fill(__first, __first + __n, __value);
+  return __n > 0 ? __first + __n : __first;
+}
+
 template <class _OutputIterator, class _Size, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
 fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
diff --git a/libcxx/test/benchmarks/algorithms/fill.bench.cpp b/libcxx/test/benchmarks/algorithms/fill.bench.cpp
index ccfb866c147b12..6b65755e3ce9cd 100644
--- a/libcxx/test/benchmarks/algorithms/fill.bench.cpp
+++ b/libcxx/test/benchmarks/algorithms/fill.bench.cpp
@@ -20,6 +20,15 @@ static void bm_fill_n(benchmark::State& state) {
 }
 BENCHMARK(bm_fill_n)->DenseRange(1, 8)->Range(16, 1 << 20);
 
+static void bm_deque_fill_n(benchmark::State& state) {
+  std::deque<bool> vec1(state.range());
+  for (auto _ : state) {
+    benchmark::DoNotOptimize(vec1);
+    benchmark::DoNotOptimize(std::fill_n(vec1.begin(), vec1.size(), false));
+  }
+}
+BENCHMARK(bm_deque_fill_n)->DenseRange(1, 8)->Range(16, 1 << 20);
+
 static void bm_ranges_fill_n(benchmark::State& state) {
   std::vector<bool> vec1(state.range());
   for (auto _ : state) {
diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
index 7d6770de702bf3..ef4a666834b2e4 100644
--- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
+++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp
@@ -15,6 +15,7 @@
 
 #include <algorithm>
 #include <cassert>
+#include <deque>
 
 #include "test_macros.h"
 #include "test_iterators.h"
@@ -144,6 +145,16 @@ void test6()
   std::fill_n(&foo[0], UDI(5), Storage());
 }
 
+void test_deque() {
+    std::deque<int> dq(5);
+    std::fill_n(dq.begin(), dq.size(), 4);
+    assert(dq[0] == 4);
+    assert(dq[1] == 4);
+    assert(dq[2] == 4);
+    assert(dq[3] == 4);
+    assert(dq[4] == 4);
+}   
+
 
 int main(int, char**)
 {
@@ -166,6 +177,8 @@ int main(int, char**)
     test5();
     test6();
 
+    test_deque();
+
 #if TEST_STD_VER > 17
     static_assert(test_constexpr());
 #endif



More information about the libcxx-commits mailing list