[libcxx-commits] [libcxx] [libc++] Optimize std::replace and std::replace_if for segmented iterators (PR #199364)
Aryan Naraghi via libcxx-commits
libcxx-commits at lists.llvm.org
Sat May 23 10:56:23 PDT 2026
https://github.com/aryann created https://github.com/llvm/llvm-project/pull/199364
Improves the `std::replace` and `std::replace_if` performance for segmented iterators.
The performance improvement is achieved by reimplementing these two functions in terms of `std::__for_each` which is specialized for segmented iterators (the idea to use `std::__for_each` where possible was proposed in #198375). Note that `std::replace` is now implemented in terms of `std::replace_if` (this idea was proposed in #102817).
This PR is a subtask of #102817.
# Improvement Summary
| Benchmark | Naive Iterators | Segmented Iterators | Speedup |
|---|---:|---:|---:|
| std::replace(vector\<int>) (prefix)/32 | 6.41 ns | 6.36 ns | 1.01X |
| std::replace(vector\<int>) (prefix)/50 | 10.2 ns | 10.1 ns | 1.01X |
| std::replace(vector\<int>) (prefix)/1024 | 189 ns | 184 ns | 1.03X |
| std::replace(vector\<int>) (prefix)/8192 | 1603 ns | 1483 ns | 1.08X |
| **std::replace(deque\<int>) (prefix)/32** | **21.2 ns** | **6.59 ns** | 🟢 **3.22X** |
| **std::replace(deque\<int>) (prefix)/50** | **28.4 ns** | **10.4 ns** | 🟢 **2.73X** |
| **std::replace(deque\<int>) (prefix)/1024** | **554 ns** | **209 ns** | 🟢 **2.65X** |
| **std::replace(deque\<int>) (prefix)/8192** | **4355 ns** | **1512 ns** | 🟢 **2.88X** |
| std::replace(list\<int>) (prefix)/32 | 16.4 ns | 16.4 ns | 1.00X |
| std::replace(list\<int>) (prefix)/50 | 30.2 ns | 30.1 ns | 1.00X |
| std::replace(list\<int>) (prefix)/1024 | 1148 ns | 996 ns | 1.15X |
| std::replace(list\<int>) (prefix)/8192 | 10206 ns | 10266 ns | 0.99X |
| std::replace_if(vector\<int>) (prefix)/32 | 15.2 ns | 15.3 ns | 0.99X |
| std::replace_if(vector\<int>) (prefix)/50 | 22.1 ns | 20.7 ns | 1.07X |
| std::replace_if(vector\<int>) (prefix)/1024 | 421 ns | 422 ns | 1.00X |
| std::replace_if(vector\<int>) (prefix)/8192 | 3097 ns | 2980 ns | 1.04X |
| **std::replace_if(deque\<int>) (prefix)/32** | **22.6 ns** | **15.5 ns** | 🟢 **1.46X** |
| **std::replace_if(deque\<int>) (prefix)/50** | **31.5 ns** | **20.9 ns** | 🟢 **1.51X** |
| **std::replace_if(deque\<int>) (prefix)/1024** | **562 ns** | **395 ns** | 🟢 **1.42X** |
| **std::replace_if(deque\<int>) (prefix)/8192** | **4386 ns** | **3035 ns** | 🟢 **1.45X** |
| std::replace_if(list\<int>) (prefix)/32 | 26.4 ns | 25.3 ns | 1.04X |
| std::replace_if(list\<int>) (prefix)/50 | 44.1 ns | 42.9 ns | 1.03X |
| std::replace_if(list\<int>) (prefix)/1024 | 1021 ns | 940 ns | 1.09X |
| std::replace_if(list\<int>) (prefix)/8192 | 10041 ns | 9969 ns | 1.01X |
| std::replace(vector\<int>) (sprinkled)/32 | 11.8 ns | 11.6 ns | 1.02X |
| std::replace(vector\<int>) (sprinkled)/50 | 18.4 ns | 18.3 ns | 1.01X |
| std::replace(vector\<int>) (sprinkled)/1024 | 311 ns | 313 ns | 0.99X |
| std::replace(vector\<int>) (sprinkled)/8192 | 2464 ns | 2777 ns | 0.89X |
| **std::replace(deque\<int>) (sprinkled)/32** | **25.4 ns** | **12.4 ns** | 🟢 **2.05X** |
| **std::replace(deque\<int>) (sprinkled)/50** | **38.9 ns** | **18.9 ns** | 🟢 **2.06X** |
| **std::replace(deque\<int>) (sprinkled)/1024** | **774 ns** | **313 ns** | 🟢 **2.47X** |
| **std::replace(deque\<int>) (sprinkled)/8192** | **6172 ns** | **2481 ns** | 🟢 **2.49X** |
| std::replace(list\<int>) (sprinkled)/32 | 21.8 ns | 20.0 ns | 1.09X |
| std::replace(list\<int>) (sprinkled)/50 | 35.5 ns | 34.3 ns | 1.03X |
| std::replace(list\<int>) (sprinkled)/1024 | 978 ns | 972 ns | 1.01X |
| std::replace(list\<int>) (sprinkled)/8192 | 10631 ns | 10372 ns | 1.02X |
| std::replace_if(vector\<int>) (sprinkled)/32 | 20.1 ns | 30.4 ns | 0.66X |
| std::replace_if(vector\<int>) (sprinkled)/50 | 30.8 ns | 30.9 ns | 1.00X |
| std::replace_if(vector\<int>) (sprinkled)/1024 | 620 ns | 614 ns | 1.01X |
| std::replace_if(vector\<int>) (sprinkled)/8192 | 4911 ns | 4905 ns | 1.00X |
| **std::replace_if(deque\<int>) (sprinkled)/32** | **25.3 ns** | **20.5 ns** | 🟢 **1.23X** |
| **std::replace_if(deque\<int>) (sprinkled)/50** | **38.9 ns** | **31.3 ns** | 🟢 **1.24X** |
| **std::replace_if(deque\<int>) (sprinkled)/1024** | **774 ns** | **620 ns** | 🟢 **1.25X** |
| **std::replace_if(deque\<int>) (sprinkled)/8192** | **6491 ns** | **4937 ns** | 🟢 **1.31X** |
| std::replace_if(list\<int>) (sprinkled)/32 | 26.9 ns | 25.8 ns | 1.04X |
| std::replace_if(list\<int>) (sprinkled)/50 | 44.4 ns | 43.9 ns | 1.01X |
| std::replace_if(list\<int>) (sprinkled)/1024 | 1043 ns | 1135 ns | 0.92X |
| std::replace_if(list\<int>) (sprinkled)/8192 | 10941 ns | 10948 ns | 1.00X |
# Raw Data
## `libcxx/test/benchmarks/algorithms/modifying/replace.bench.cpp`
### Before
```
# | ----------------------------------------------------------------------------------------
# | Benchmark Time CPU Iterations
# | ----------------------------------------------------------------------------------------
# | std::replace(vector<int>) (prefix)/32 6.41 ns 6.39 ns 107958051
# | std::replace(vector<int>) (prefix)/50 10.2 ns 10.2 ns 68465684
# | std::replace(vector<int>) (prefix)/1024 189 ns 188 ns 3547375
# | std::replace(vector<int>) (prefix)/8192 1603 ns 1531 ns 462850
# | std::replace(deque<int>) (prefix)/32 21.2 ns 21.2 ns 33579423
# | std::replace(deque<int>) (prefix)/50 28.4 ns 28.4 ns 24105015
# | std::replace(deque<int>) (prefix)/1024 554 ns 554 ns 1261057
# | std::replace(deque<int>) (prefix)/8192 4355 ns 4355 ns 160651
# | std::replace(list<int>) (prefix)/32 16.4 ns 16.4 ns 42930036
# | std::replace(list<int>) (prefix)/50 30.2 ns 30.2 ns 23241143
# | std::replace(list<int>) (prefix)/1024 1148 ns 1060 ns 775022
# | std::replace(list<int>) (prefix)/8192 10206 ns 10139 ns 69798
# | std::replace_if(vector<int>) (prefix)/32 15.2 ns 15.2 ns 46224758
# | std::replace_if(vector<int>) (prefix)/50 22.1 ns 21.3 ns 33588284
# | std::replace_if(vector<int>) (prefix)/1024 421 ns 420 ns 1657083
# | std::replace_if(vector<int>) (prefix)/8192 3097 ns 3013 ns 237578
# | std::replace_if(deque<int>) (prefix)/32 22.6 ns 22.2 ns 32460005
# | std::replace_if(deque<int>) (prefix)/50 31.5 ns 31.5 ns 23215861
# | std::replace_if(deque<int>) (prefix)/1024 562 ns 562 ns 1242501
# | std::replace_if(deque<int>) (prefix)/8192 4386 ns 4381 ns 159525
# | std::replace_if(list<int>) (prefix)/32 26.4 ns 25.8 ns 26716945
# | std::replace_if(list<int>) (prefix)/50 44.1 ns 43.4 ns 16366995
# | std::replace_if(list<int>) (prefix)/1024 1021 ns 1019 ns 681922
# | std::replace_if(list<int>) (prefix)/8192 10041 ns 10034 ns 66707
# | std::replace(vector<int>) (sprinkled)/32 11.8 ns 11.7 ns 60017834
# | std::replace(vector<int>) (sprinkled)/50 18.4 ns 18.3 ns 38539889
# | std::replace(vector<int>) (sprinkled)/1024 311 ns 311 ns 2252194
# | std::replace(vector<int>) (sprinkled)/8192 2464 ns 2461 ns 280234
# | std::replace(deque<int>) (sprinkled)/32 25.4 ns 25.4 ns 27667109
# | std::replace(deque<int>) (sprinkled)/50 38.9 ns 38.8 ns 17965071
# | std::replace(deque<int>) (sprinkled)/1024 774 ns 773 ns 901736
# | std::replace(deque<int>) (sprinkled)/8192 6172 ns 6165 ns 112841
# | std::replace(list<int>) (sprinkled)/32 21.8 ns 20.9 ns 34507747
# | std::replace(list<int>) (sprinkled)/50 35.5 ns 34.6 ns 20317062
# | std::replace(list<int>) (sprinkled)/1024 978 ns 977 ns 727152
# | std::replace(list<int>) (sprinkled)/8192 10631 ns 10381 ns 66047
# | std::replace_if(vector<int>) (sprinkled)/32 20.1 ns 20.1 ns 35117996
# | std::replace_if(vector<int>) (sprinkled)/50 30.8 ns 30.8 ns 22640167
# | std::replace_if(vector<int>) (sprinkled)/1024 620 ns 618 ns 1134191
# | std::replace_if(vector<int>) (sprinkled)/8192 4911 ns 4906 ns 142338
# | std::replace_if(deque<int>) (sprinkled)/32 25.3 ns 25.3 ns 27592623
# | std::replace_if(deque<int>) (sprinkled)/50 38.9 ns 38.8 ns 17993008
# | std::replace_if(deque<int>) (sprinkled)/1024 774 ns 774 ns 905750
# | std::replace_if(deque<int>) (sprinkled)/8192 6491 ns 6347 ns 113313
# | std::replace_if(list<int>) (sprinkled)/32 26.9 ns 26.2 ns 26892461
# | std::replace_if(list<int>) (sprinkled)/50 44.4 ns 44.3 ns 15909597
# | std::replace_if(list<int>) (sprinkled)/1024 1043 ns 1042 ns 685126
# | std::replace_if(list<int>) (sprinkled)/8192 10941 ns 10930 ns 62652
# `-----------------------------
```
### After
```
# | ----------------------------------------------------------------------------------------
# | Benchmark Time CPU Iterations
# | ----------------------------------------------------------------------------------------
# | std::replace(vector<int>) (prefix)/32 6.36 ns 6.36 ns 110303966
# | std::replace(vector<int>) (prefix)/50 10.1 ns 10.1 ns 69408638
# | std::replace(vector<int>) (prefix)/1024 184 ns 184 ns 3809006
# | std::replace(vector<int>) (prefix)/8192 1483 ns 1482 ns 471266
# | std::replace(deque<int>) (prefix)/32 6.59 ns 6.59 ns 106020447
# | std::replace(deque<int>) (prefix)/50 10.4 ns 10.4 ns 67818286
# | std::replace(deque<int>) (prefix)/1024 209 ns 200 ns 3795437
# | std::replace(deque<int>) (prefix)/8192 1512 ns 1509 ns 463518
# | std::replace(list<int>) (prefix)/32 16.4 ns 16.4 ns 42776304
# | std::replace(list<int>) (prefix)/50 30.1 ns 30.0 ns 23445167
# | std::replace(list<int>) (prefix)/1024 996 ns 995 ns 930579
# | std::replace(list<int>) (prefix)/8192 10266 ns 10265 ns 70900
# | std::replace_if(vector<int>) (prefix)/32 15.3 ns 15.3 ns 45883888
# | std::replace_if(vector<int>) (prefix)/50 20.7 ns 20.6 ns 33852567
# | std::replace_if(vector<int>) (prefix)/1024 422 ns 417 ns 1706963
# | std::replace_if(vector<int>) (prefix)/8192 2980 ns 2976 ns 235952
# | std::replace_if(deque<int>) (prefix)/32 15.5 ns 15.5 ns 44886757
# | std::replace_if(deque<int>) (prefix)/50 20.9 ns 20.9 ns 33456965
# | std::replace_if(deque<int>) (prefix)/1024 395 ns 395 ns 1769862
# | std::replace_if(deque<int>) (prefix)/8192 3035 ns 3035 ns 230162
# | std::replace_if(list<int>) (prefix)/32 25.3 ns 25.3 ns 27664594
# | std::replace_if(list<int>) (prefix)/50 42.9 ns 42.9 ns 16313746
# | std::replace_if(list<int>) (prefix)/1024 940 ns 940 ns 693028
# | std::replace_if(list<int>) (prefix)/8192 9969 ns 9968 ns 66946
# | std::replace(vector<int>) (sprinkled)/32 11.6 ns 11.6 ns 60118347
# | std::replace(vector<int>) (sprinkled)/50 18.3 ns 18.3 ns 38506180
# | std::replace(vector<int>) (sprinkled)/1024 313 ns 312 ns 2250717
# | std::replace(vector<int>) (sprinkled)/8192 2777 ns 2675 ns 279739
# | std::replace(deque<int>) (sprinkled)/32 12.4 ns 12.4 ns 52439956
# | std::replace(deque<int>) (sprinkled)/50 18.9 ns 18.9 ns 36837646
# | std::replace(deque<int>) (sprinkled)/1024 313 ns 313 ns 2235408
# | std::replace(deque<int>) (sprinkled)/8192 2481 ns 2480 ns 280837
# | std::replace(list<int>) (sprinkled)/32 20.0 ns 20.0 ns 35095284
# | std::replace(list<int>) (sprinkled)/50 34.3 ns 34.2 ns 20552752
# | std::replace(list<int>) (sprinkled)/1024 972 ns 971 ns 716274
# | std::replace(list<int>) (sprinkled)/8192 10372 ns 10364 ns 66868
# | std::replace_if(vector<int>) (sprinkled)/32 30.4 ns 21.3 ns 34651750
# | std::replace_if(vector<int>) (sprinkled)/50 30.9 ns 30.9 ns 22560632
# | std::replace_if(vector<int>) (sprinkled)/1024 614 ns 613 ns 1131478
# | std::replace_if(vector<int>) (sprinkled)/8192 4905 ns 4902 ns 143421
# | std::replace_if(deque<int>) (sprinkled)/32 20.5 ns 20.5 ns 34339143
# | std::replace_if(deque<int>) (sprinkled)/50 31.3 ns 31.3 ns 22280647
# | std::replace_if(deque<int>) (sprinkled)/1024 620 ns 619 ns 1130509
# | std::replace_if(deque<int>) (sprinkled)/8192 4937 ns 4936 ns 141571
# | std::replace_if(list<int>) (sprinkled)/32 25.8 ns 25.8 ns 27125264
# | std::replace_if(list<int>) (sprinkled)/50 43.9 ns 43.8 ns 15962456
# | std::replace_if(list<int>) (sprinkled)/1024 1135 ns 1087 ns 710523
# | std::replace_if(list<int>) (sprinkled)/8192 10948 ns 10928 ns 62123
# `-----------------------------
```
>From 865eebaab8f18ee25afcf397896f5fc591a1128f Mon Sep 17 00:00:00 2001
From: Aryan Naraghi <aryan.naraghi at gmail.com>
Date: Sat, 23 May 2026 10:37:06 -0700
Subject: [PATCH 1/2] Reimplements std::replace and std::replace_if in terms of
std::for_each
This improves performance for segmented iterators.
---
libcxx/include/__algorithm/replace.h | 7 ++++---
libcxx/include/__algorithm/replace_if.h | 15 ++++++++++++---
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/libcxx/include/__algorithm/replace.h b/libcxx/include/__algorithm/replace.h
index 8057c78686e11..b10283b1e8b8d 100644
--- a/libcxx/include/__algorithm/replace.h
+++ b/libcxx/include/__algorithm/replace.h
@@ -9,6 +9,7 @@
#ifndef _LIBCPP___ALGORITHM_REPLACE_H
#define _LIBCPP___ALGORITHM_REPLACE_H
+#include <__algorithm/replace_if.h>
#include <__config>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -20,9 +21,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
replace(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_value, const _Tp& __new_value) {
- for (; __first != __last; ++__first)
- if (*__first == __old_value)
- *__first = __new_value;
+ auto __pred = [&__old_value](const auto& __curr) { return __curr == __old_value; };
+
+ std::replace_if(std::move(__first), std::move(__last), __pred, __new_value);
}
_LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__algorithm/replace_if.h b/libcxx/include/__algorithm/replace_if.h
index 78487e3deed70..91ab3aa3e154e 100644
--- a/libcxx/include/__algorithm/replace_if.h
+++ b/libcxx/include/__algorithm/replace_if.h
@@ -9,7 +9,9 @@
#ifndef _LIBCPP___ALGORITHM_REPLACE_IF_H
#define _LIBCPP___ALGORITHM_REPLACE_IF_H
+#include <__algorithm/for_each.h>
#include <__config>
+#include <__functional/identity.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -20,9 +22,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Predicate, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
replace_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, const _Tp& __new_value) {
- for (; __first != __last; ++__first)
- if (__pred(*__first))
- *__first = __new_value;
+ auto __apply = [&__pred, &__new_value](auto& __curr) {
+ if (__pred(__curr)) {
+ __curr = __new_value;
+ }
+ };
+
+ // We implement __replace_if using __for_each to inherit its optimizations for
+ // segmented iterators. This improves performance without adding complexity.
+ __identity __proj;
+ std::__for_each(std::move(__first), std::move(__last), __apply, __proj);
}
_LIBCPP_END_NAMESPACE_STD
>From eb026e7b7234b2bf2a53e9a6a938411213caef63 Mon Sep 17 00:00:00 2001
From: Aryan Naraghi <aryan.naraghi at gmail.com>
Date: Sat, 23 May 2026 10:42:43 -0700
Subject: [PATCH 2/2] Fixes some minor issues
---
libcxx/include/__algorithm/replace.h | 1 +
libcxx/include/__algorithm/replace_if.h | 3 ++-
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/libcxx/include/__algorithm/replace.h b/libcxx/include/__algorithm/replace.h
index b10283b1e8b8d..3e91040664072 100644
--- a/libcxx/include/__algorithm/replace.h
+++ b/libcxx/include/__algorithm/replace.h
@@ -11,6 +11,7 @@
#include <__algorithm/replace_if.h>
#include <__config>
+#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
diff --git a/libcxx/include/__algorithm/replace_if.h b/libcxx/include/__algorithm/replace_if.h
index 91ab3aa3e154e..5825865001cbb 100644
--- a/libcxx/include/__algorithm/replace_if.h
+++ b/libcxx/include/__algorithm/replace_if.h
@@ -12,6 +12,7 @@
#include <__algorithm/for_each.h>
#include <__config>
#include <__functional/identity.h>
+#include <__utility/move.h>
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
@@ -22,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
template <class _ForwardIterator, class _Predicate, class _Tp>
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void
replace_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred, const _Tp& __new_value) {
- auto __apply = [&__pred, &__new_value](auto& __curr) {
+ auto __apply = [&__pred, &__new_value](auto&& __curr) {
if (__pred(__curr)) {
__curr = __new_value;
}
More information about the libcxx-commits
mailing list