[libcxx-commits] [libcxx] [libc++] Optimize std::replace and std::replace_if for segmented iterators (PR #199364)

Aryan Naraghi via libcxx-commits libcxx-commits at lists.llvm.org
Sat May 23 10:56:23 PDT 2026


https://github.com/aryann created https://github.com/llvm/llvm-project/pull/199364

Improves the `std::replace` and `std::replace_if` performance for segmented iterators.

The performance improvement is achieved by reimplementing these two functions in terms of `std::__for_each` which is specialized for segmented iterators (the idea to use `std::__for_each` where possible was proposed in #198375). Note that `std::replace` is now implemented in terms of `std::replace_if` (this idea was proposed in #102817).

This PR is a subtask of #102817.

# Improvement Summary

| Benchmark | Naive Iterators | Segmented Iterators | Speedup |
|---|---:|---:|---:|
| std::replace(vector\<int>) (prefix)/32 | 6.41 ns | 6.36 ns | 1.01X |
| std::replace(vector\<int>) (prefix)/50 | 10.2 ns | 10.1 ns | 1.01X |
| std::replace(vector\<int>) (prefix)/1024 | 189 ns | 184 ns | 1.03X |
| std::replace(vector\<int>) (prefix)/8192 | 1603 ns | 1483 ns | 1.08X |
| **std::replace(deque\<int>) (prefix)/32** | **21.2 ns** | **6.59 ns** | 🟢 **3.22X** |
| **std::replace(deque\<int>) (prefix)/50** | **28.4 ns** | **10.4 ns** | 🟢 **2.73X** |
| **std::replace(deque\<int>) (prefix)/1024** | **554 ns** | **209 ns** | 🟢 **2.65X** |
| **std::replace(deque\<int>) (prefix)/8192** | **4355 ns** | **1512 ns** | 🟢 **2.88X** |
| std::replace(list\<int>) (prefix)/32 | 16.4 ns | 16.4 ns | 1.00X |
| std::replace(list\<int>) (prefix)/50 | 30.2 ns | 30.1 ns | 1.00X |
| std::replace(list\<int>) (prefix)/1024 | 1148 ns | 996 ns | 1.15X |
| std::replace(list\<int>) (prefix)/8192 | 10206 ns | 10266 ns | 0.99X |
| std::replace_if(vector\<int>) (prefix)/32 | 15.2 ns | 15.3 ns | 0.99X |
| std::replace_if(vector\<int>) (prefix)/50 | 22.1 ns | 20.7 ns | 1.07X |
| std::replace_if(vector\<int>) (prefix)/1024 | 421 ns | 422 ns | 1.00X |
| std::replace_if(vector\<int>) (prefix)/8192 | 3097 ns | 2980 ns | 1.04X |
| **std::replace_if(deque\<int>) (prefix)/32** | **22.6 ns** | **15.5 ns** | 🟢 **1.46X** |
| **std::replace_if(deque\<int>) (prefix)/50** | **31.5 ns** | **20.9 ns** | 🟢 **1.51X** |
| **std::replace_if(deque\<int>) (prefix)/1024** | **562 ns** | **395 ns** | 🟢 **1.42X** |
| **std::replace_if(deque\<int>) (prefix)/8192** | **4386 ns** | **3035 ns** | 🟢 **1.45X** |
| std::replace_if(list\<int>) (prefix)/32 | 26.4 ns | 25.3 ns | 1.04X |
| std::replace_if(list\<int>) (prefix)/50 | 44.1 ns | 42.9 ns | 1.03X |
| std::replace_if(list\<int>) (prefix)/1024 | 1021 ns | 940 ns | 1.09X |
| std::replace_if(list\<int>) (prefix)/8192 | 10041 ns | 9969 ns | 1.01X |
| std::replace(vector\<int>) (sprinkled)/32 | 11.8 ns | 11.6 ns | 1.02X |
| std::replace(vector\<int>) (sprinkled)/50 | 18.4 ns | 18.3 ns | 1.01X |
| std::replace(vector\<int>) (sprinkled)/1024 | 311 ns | 313 ns | 0.99X |
| std::replace(vector\<int>) (sprinkled)/8192 | 2464 ns | 2777 ns | 0.89X |
| **std::replace(deque\<int>) (sprinkled)/32** | **25.4 ns** | **12.4 ns** | 🟢 **2.05X** |
| **std::replace(deque\<int>) (sprinkled)/50** | **38.9 ns** | **18.9 ns** | 🟢 **2.06X** |
| **std::replace(deque\<int>) (sprinkled)/1024** | **774 ns** | **313 ns** | 🟢 **2.47X** |
| **std::replace(deque\<int>) (sprinkled)/8192** | **6172 ns** | **2481 ns** | 🟢 **2.49X** |
| std::replace(list\<int>) (sprinkled)/32 | 21.8 ns | 20.0 ns | 1.09X |
| std::replace(list\<int>) (sprinkled)/50 | 35.5 ns | 34.3 ns | 1.03X |
| std::replace(list\<int>) (sprinkled)/1024 | 978 ns | 972 ns | 1.01X |
| std::replace(list\<int>) (sprinkled)/8192 | 10631 ns | 10372 ns | 1.02X |
| std::replace_if(vector\<int>) (sprinkled)/32 | 20.1 ns | 30.4 ns | 0.66X |
| std::replace_if(vector\<int>) (sprinkled)/50 | 30.8 ns | 30.9 ns | 1.00X |
| std::replace_if(vector\<int>) (sprinkled)/1024 | 620 ns | 614 ns | 1.01X |
| std::replace_if(vector\<int>) (sprinkled)/8192 | 4911 ns | 4905 ns | 1.00X |
| **std::replace_if(deque\<int>) (sprinkled)/32** | **25.3 ns** | **20.5 ns** | 🟢 **1.23X** |
| **std::replace_if(deque\<int>) (sprinkled)/50** | **38.9 ns** | **31.3 ns** | 🟢 **1.24X** |
| **std::replace_if(deque\<int>) (sprinkled)/1024** | **774 ns** | **620 ns** | 🟢 **1.25X** |
| **std::replace_if(deque\<int>) (sprinkled)/8192** | **6491 ns** | **4937 ns** | 🟢 **1.31X** |
| std::replace_if(list\<int>) (sprinkled)/32 | 26.9 ns | 25.8 ns | 1.04X |
| std::replace_if(list\<int>) (sprinkled)/50 | 44.4 ns | 43.9 ns | 1.01X |
| std::replace_if(list\<int>) (sprinkled)/1024 | 1043 ns | 1135 ns | 0.92X |
| std::replace_if(list\<int>) (sprinkled)/8192 | 10941 ns | 10948 ns | 1.00X |

# Raw Data

## `libcxx/test/benchmarks/algorithms/modifying/replace.bench.cpp`

### Before

```
# | ----------------------------------------------------------------------------------------
# | Benchmark                                              Time             CPU   Iterations
# | ----------------------------------------------------------------------------------------
# | std::replace(vector<int>) (prefix)/32               6.41 ns         6.39 ns    107958051
# | std::replace(vector<int>) (prefix)/50               10.2 ns         10.2 ns     68465684
# | std::replace(vector<int>) (prefix)/1024              189 ns          188 ns      3547375
# | std::replace(vector<int>) (prefix)/8192             1603 ns         1531 ns       462850
# | std::replace(deque<int>) (prefix)/32                21.2 ns         21.2 ns     33579423
# | std::replace(deque<int>) (prefix)/50                28.4 ns         28.4 ns     24105015
# | std::replace(deque<int>) (prefix)/1024               554 ns          554 ns      1261057
# | std::replace(deque<int>) (prefix)/8192              4355 ns         4355 ns       160651
# | std::replace(list<int>) (prefix)/32                 16.4 ns         16.4 ns     42930036
# | std::replace(list<int>) (prefix)/50                 30.2 ns         30.2 ns     23241143
# | std::replace(list<int>) (prefix)/1024               1148 ns         1060 ns       775022
# | std::replace(list<int>) (prefix)/8192              10206 ns        10139 ns        69798
# | std::replace_if(vector<int>) (prefix)/32            15.2 ns         15.2 ns     46224758
# | std::replace_if(vector<int>) (prefix)/50            22.1 ns         21.3 ns     33588284
# | std::replace_if(vector<int>) (prefix)/1024           421 ns          420 ns      1657083
# | std::replace_if(vector<int>) (prefix)/8192          3097 ns         3013 ns       237578
# | std::replace_if(deque<int>) (prefix)/32             22.6 ns         22.2 ns     32460005
# | std::replace_if(deque<int>) (prefix)/50             31.5 ns         31.5 ns     23215861
# | std::replace_if(deque<int>) (prefix)/1024            562 ns          562 ns      1242501
# | std::replace_if(deque<int>) (prefix)/8192           4386 ns         4381 ns       159525
# | std::replace_if(list<int>) (prefix)/32              26.4 ns         25.8 ns     26716945
# | std::replace_if(list<int>) (prefix)/50              44.1 ns         43.4 ns     16366995
# | std::replace_if(list<int>) (prefix)/1024            1021 ns         1019 ns       681922
# | std::replace_if(list<int>) (prefix)/8192           10041 ns        10034 ns        66707
# | std::replace(vector<int>) (sprinkled)/32            11.8 ns         11.7 ns     60017834
# | std::replace(vector<int>) (sprinkled)/50            18.4 ns         18.3 ns     38539889
# | std::replace(vector<int>) (sprinkled)/1024           311 ns          311 ns      2252194
# | std::replace(vector<int>) (sprinkled)/8192          2464 ns         2461 ns       280234
# | std::replace(deque<int>) (sprinkled)/32             25.4 ns         25.4 ns     27667109
# | std::replace(deque<int>) (sprinkled)/50             38.9 ns         38.8 ns     17965071
# | std::replace(deque<int>) (sprinkled)/1024            774 ns          773 ns       901736
# | std::replace(deque<int>) (sprinkled)/8192           6172 ns         6165 ns       112841
# | std::replace(list<int>) (sprinkled)/32              21.8 ns         20.9 ns     34507747
# | std::replace(list<int>) (sprinkled)/50              35.5 ns         34.6 ns     20317062
# | std::replace(list<int>) (sprinkled)/1024             978 ns          977 ns       727152
# | std::replace(list<int>) (sprinkled)/8192           10631 ns        10381 ns        66047
# | std::replace_if(vector<int>) (sprinkled)/32         20.1 ns         20.1 ns     35117996
# | std::replace_if(vector<int>) (sprinkled)/50         30.8 ns         30.8 ns     22640167
# | std::replace_if(vector<int>) (sprinkled)/1024        620 ns          618 ns      1134191
# | std::replace_if(vector<int>) (sprinkled)/8192       4911 ns         4906 ns       142338
# | std::replace_if(deque<int>) (sprinkled)/32          25.3 ns         25.3 ns     27592623
# | std::replace_if(deque<int>) (sprinkled)/50          38.9 ns         38.8 ns     17993008
# | std::replace_if(deque<int>) (sprinkled)/1024         774 ns          774 ns       905750
# | std::replace_if(deque<int>) (sprinkled)/8192        6491 ns         6347 ns       113313
# | std::replace_if(list<int>) (sprinkled)/32           26.9 ns         26.2 ns     26892461
# | std::replace_if(list<int>) (sprinkled)/50           44.4 ns         44.3 ns     15909597
# | std::replace_if(list<int>) (sprinkled)/1024         1043 ns         1042 ns       685126
# | std::replace_if(list<int>) (sprinkled)/8192        10941 ns        10930 ns        62652
# `-----------------------------
```

### After

```
# | ----------------------------------------------------------------------------------------
# | Benchmark                                              Time             CPU   Iterations
# | ----------------------------------------------------------------------------------------
# | std::replace(vector<int>) (prefix)/32               6.36 ns         6.36 ns    110303966
# | std::replace(vector<int>) (prefix)/50               10.1 ns         10.1 ns     69408638
# | std::replace(vector<int>) (prefix)/1024              184 ns          184 ns      3809006
# | std::replace(vector<int>) (prefix)/8192             1483 ns         1482 ns       471266
# | std::replace(deque<int>) (prefix)/32                6.59 ns         6.59 ns    106020447
# | std::replace(deque<int>) (prefix)/50                10.4 ns         10.4 ns     67818286
# | std::replace(deque<int>) (prefix)/1024               209 ns          200 ns      3795437
# | std::replace(deque<int>) (prefix)/8192              1512 ns         1509 ns       463518
# | std::replace(list<int>) (prefix)/32                 16.4 ns         16.4 ns     42776304
# | std::replace(list<int>) (prefix)/50                 30.1 ns         30.0 ns     23445167
# | std::replace(list<int>) (prefix)/1024                996 ns          995 ns       930579
# | std::replace(list<int>) (prefix)/8192              10266 ns        10265 ns        70900
# | std::replace_if(vector<int>) (prefix)/32            15.3 ns         15.3 ns     45883888
# | std::replace_if(vector<int>) (prefix)/50            20.7 ns         20.6 ns     33852567
# | std::replace_if(vector<int>) (prefix)/1024           422 ns          417 ns      1706963
# | std::replace_if(vector<int>) (prefix)/8192          2980 ns         2976 ns       235952
# | std::replace_if(deque<int>) (prefix)/32             15.5 ns         15.5 ns     44886757
# | std::replace_if(deque<int>) (prefix)/50             20.9 ns         20.9 ns     33456965
# | std::replace_if(deque<int>) (prefix)/1024            395 ns          395 ns      1769862
# | std::replace_if(deque<int>) (prefix)/8192           3035 ns         3035 ns       230162
# | std::replace_if(list<int>) (prefix)/32              25.3 ns         25.3 ns     27664594
# | std::replace_if(list<int>) (prefix)/50              42.9 ns         42.9 ns     16313746
# | std::replace_if(list<int>) (prefix)/1024             940 ns          940 ns       693028
# | std::replace_if(list<int>) (prefix)/8192            9969 ns         9968 ns        66946
# | std::replace(vector<int>) (sprinkled)/32            11.6 ns         11.6 ns     60118347
# | std::replace(vector<int>) (sprinkled)/50            18.3 ns         18.3 ns     38506180
# | std::replace(vector<int>) (sprinkled)/1024           313 ns          312 ns      2250717
# | std::replace(vector<int>) (sprinkled)/8192          2777 ns         2675 ns       279739
# | std::replace(deque<int>) (sprinkled)/32             12.4 ns         12.4 ns     52439956
# | std::replace(deque<int>) (sprinkled)/50             18.9 ns         18.9 ns     36837646
# | std::replace(deque<int>) (sprinkled)/1024            313 ns          313 ns      2235408
# | std::replace(deque<int>) (sprinkled)/8192           2481 ns         2480 ns       280837
# | std::replace(list<int>) (sprinkled)/32              20.0 ns         20.0 ns     35095284
# | std::replace(list<int>) (sprinkled)/50              34.3 ns         34.2 ns     20552752
# | std::replace(list<int>) (sprinkled)/1024             972 ns          971 ns       716274
# | std::replace(list<int>) (sprinkled)/8192           10372 ns        10364 ns        66868
# | std::replace_if(vector<int>) (sprinkled)/32         30.4 ns         21.3 ns     34651750
# | std::replace_if(vector<int>) (sprinkled)/50         30.9 ns         30.9 ns     22560632
# | std::replace_if(vector<int>) (sprinkled)/1024        614 ns          613 ns      1131478
# | std::replace_if(vector<int>) (sprinkled)/8192       4905 ns         4902 ns       143421
# | std::replace_if(deque<int>) (sprinkled)/32          20.5 ns         20.5 ns     34339143
# | std::replace_if(deque<int>) (sprinkled)/50          31.3 ns         31.3 ns     22280647
# | std::replace_if(deque<int>) (sprinkled)/1024         620 ns          619 ns      1130509
# | std::replace_if(deque<int>) (sprinkled)/8192        4937 ns         4936 ns       141571
# | std::replace_if(list<int>) (sprinkled)/32           25.8 ns         25.8 ns     27125264
# | std::replace_if(list<int>) (sprinkled)/50           43.9 ns         43.8 ns     15962456
# | std::replace_if(list<int>) (sprinkled)/1024         1135 ns         1087 ns       710523
# | std::replace_if(list<int>) (sprinkled)/8192        10948 ns        10928 ns        62123
# `-----------------------------
```

>From 865eebaab8f18ee25afcf397896f5fc591a1128f Mon Sep 17 00:00:00 2001
From: Aryan Naraghi <aryan.naraghi at gmail.com>
Date: Sat, 23 May 2026 10:37:06 -0700
Subject: [PATCH 1/2] Reimplements std::replace and std::replace_if in terms of
 std::for_each

This improves performance for segmented iterators.
---
 libcxx/include/__algorithm/replace.h    |  7 ++++---
 libcxx/include/__algorithm/replace_if.h | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/libcxx/include/__algorithm/replace.h b/libcxx/include/__algorithm/replace.h
index 8057c78686e11..b10283b1e8b8d 100644
--- a/libcxx/include/__algorithm/replace.h
+++ b/libcxx/include/__algorithm/replace.h
@@ -9,6 +9,7 @@
 #ifndef _LIBCPP___ALGORITHM_REPLACE_H
 #define _LIBCPP___ALGORITHM_REPLACE_H
 
+#include <__algorithm/replace_if.h>
 #include <__config>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -20,9 +21,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _ForwardIterator, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 replace(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_value, const _Tp& __new_value) {
-  for (; __first != __last; ++__first)
-    if (*__first == __old_value)
-      *__first = __new_value;
+  auto __pred = [&__old_value](const auto& __curr) { return __curr == __old_value; };
+
+  std::replace_if(std::move(__first), std::move(__last), __pred, __new_value);
 }
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__algorithm/replace_if.h b/libcxx/include/__algorithm/replace_if.h
index 78487e3deed70..91ab3aa3e154e 100644
--- a/libcxx/include/__algorithm/replace_if.h
+++ b/libcxx/include/__algorithm/replace_if.h
@@ -9,7 +9,9 @@
 #ifndef _LIBCPP___ALGORITHM_REPLACE_IF_H
 #define _LIBCPP___ALGORITHM_REPLACE_IF_H
 
+#include <__algorithm/for_each.h>
 #include <__config>
+#include <__functional/identity.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -20,9 +22,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _ForwardIterator, class _Predicate, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 replace_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, const _Tp& __new_value) {
-  for (; __first != __last; ++__first)
-    if (__pred(*__first))
-      *__first = __new_value;
+  auto __apply = [&__pred, &__new_value](auto& __curr) {
+    if (__pred(__curr)) {
+      __curr = __new_value;
+    }
+  };
+
+  // We implement __replace_if using __for_each to inherit its optimizations for
+  // segmented iterators. This improves performance without adding complexity.
+  __identity __proj;
+  std::__for_each(std::move(__first), std::move(__last), __apply, __proj);
 }
 
 _LIBCPP_END_NAMESPACE_STD

>From eb026e7b7234b2bf2a53e9a6a938411213caef63 Mon Sep 17 00:00:00 2001
From: Aryan Naraghi <aryan.naraghi at gmail.com>
Date: Sat, 23 May 2026 10:42:43 -0700
Subject: [PATCH 2/2] Fixes some minor issues

---
 libcxx/include/__algorithm/replace.h    | 1 +
 libcxx/include/__algorithm/replace_if.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/libcxx/include/__algorithm/replace.h b/libcxx/include/__algorithm/replace.h
index b10283b1e8b8d..3e91040664072 100644
--- a/libcxx/include/__algorithm/replace.h
+++ b/libcxx/include/__algorithm/replace.h
@@ -11,6 +11,7 @@
 
 #include <__algorithm/replace_if.h>
 #include <__config>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
diff --git a/libcxx/include/__algorithm/replace_if.h b/libcxx/include/__algorithm/replace_if.h
index 91ab3aa3e154e..5825865001cbb 100644
--- a/libcxx/include/__algorithm/replace_if.h
+++ b/libcxx/include/__algorithm/replace_if.h
@@ -12,6 +12,7 @@
 #include <__algorithm/for_each.h>
 #include <__config>
 #include <__functional/identity.h>
+#include <__utility/move.h>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #  pragma GCC system_header
@@ -22,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template <class _ForwardIterator, class _Predicate, class _Tp>
 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
 replace_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, const _Tp& __new_value) {
-  auto __apply = [&__pred, &__new_value](auto& __curr) {
+  auto __apply = [&__pred, &__new_value](auto&& __curr) {
     if (__pred(__curr)) {
       __curr = __new_value;
     }



More information about the libcxx-commits mailing list