[libcxx-commits] [libcxx] 92bd81a - [libc++][PSTL] Copy the headers into libc++

Nikolas Klauser via libcxx-commits libcxx-commits at lists.llvm.org
Fri Apr 21 02:21:45 PDT 2023


Author: Nikolas Klauser
Date: 2023-04-21T11:21:33+02:00
New Revision: 92bd81a2be23bada594f94578b3a45413433a789

URL: https://github.com/llvm/llvm-project/commit/92bd81a2be23bada594f94578b3a45413433a789
DIFF: https://github.com/llvm/llvm-project/commit/92bd81a2be23bada594f94578b3a45413433a789.diff

LOG: [libc++][PSTL] Copy the headers into libc++

We decided to integrate the PSTL into our own headers and only share the backend impletementations. This is a first step in that direction, specifically it copies the PSTL headers into the libc++ structure.

Reviewed By: ldionne, #libc

Spies: rodgert, mikhail.ramalho, jplehr, bcain, h-vetinari, Mordante, rarutyun, var-const, sstefan1, pcwang-thead, libcxx-commits, arichardson, mgrang, miyuki

Differential Revision: https://reviews.llvm.org/D141779

Added: 
    libcxx/include/__pstl_algorithm
    libcxx/include/__pstl_config_site.in
    libcxx/include/__pstl_execution
    libcxx/include/__pstl_memory
    libcxx/include/__pstl_numeric
    libcxx/include/pstl/internal/algorithm_fwd.h
    libcxx/include/pstl/internal/algorithm_impl.h
    libcxx/include/pstl/internal/execution_defs.h
    libcxx/include/pstl/internal/execution_impl.h
    libcxx/include/pstl/internal/glue_algorithm_defs.h
    libcxx/include/pstl/internal/glue_algorithm_impl.h
    libcxx/include/pstl/internal/glue_execution_defs.h
    libcxx/include/pstl/internal/glue_memory_defs.h
    libcxx/include/pstl/internal/glue_memory_impl.h
    libcxx/include/pstl/internal/glue_numeric_defs.h
    libcxx/include/pstl/internal/glue_numeric_impl.h
    libcxx/include/pstl/internal/memory_impl.h
    libcxx/include/pstl/internal/numeric_fwd.h
    libcxx/include/pstl/internal/numeric_impl.h
    libcxx/include/pstl/internal/omp/parallel_for.h
    libcxx/include/pstl/internal/omp/parallel_for_each.h
    libcxx/include/pstl/internal/omp/parallel_invoke.h
    libcxx/include/pstl/internal/omp/parallel_merge.h
    libcxx/include/pstl/internal/omp/parallel_reduce.h
    libcxx/include/pstl/internal/omp/parallel_scan.h
    libcxx/include/pstl/internal/omp/parallel_stable_partial_sort.h
    libcxx/include/pstl/internal/omp/parallel_stable_sort.h
    libcxx/include/pstl/internal/omp/parallel_transform_reduce.h
    libcxx/include/pstl/internal/omp/parallel_transform_scan.h
    libcxx/include/pstl/internal/omp/util.h
    libcxx/include/pstl/internal/parallel_backend.h
    libcxx/include/pstl/internal/parallel_backend_omp.h
    libcxx/include/pstl/internal/parallel_backend_serial.h
    libcxx/include/pstl/internal/parallel_backend_tbb.h
    libcxx/include/pstl/internal/parallel_backend_utils.h
    libcxx/include/pstl/internal/parallel_impl.h
    libcxx/include/pstl/internal/pstl_config.h
    libcxx/include/pstl/internal/unseq_backend_simd.h
    libcxx/include/pstl/internal/utils.h

Modified: 
    libcxx/test/libcxx/lint/lint_headers.sh.py
    libcxx/test/libcxx/private_headers.verify.cpp
    libcxx/utils/data/ignore_format.txt
    libcxx/utils/generate_iwyu_mapping.py

Removed: 
    


################################################################################
diff  --git a/libcxx/include/__pstl_algorithm b/libcxx/include/__pstl_algorithm
new file mode 100644
index 0000000000000..79c18385c5a55
--- /dev/null
+++ b/libcxx/include/__pstl_algorithm
@@ -0,0 +1,15 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __PSTL_ALGORITHM
+#define __PSTL_ALGORITHM
+
+#include <pstl/internal/glue_algorithm_impl.h>
+
+#endif /* __PSTL_ALGORITHM */

diff  --git a/libcxx/include/__pstl_config_site.in b/libcxx/include/__pstl_config_site.in
new file mode 100644
index 0000000000000..a41a1c383a45b
--- /dev/null
+++ b/libcxx/include/__pstl_config_site.in
@@ -0,0 +1,17 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __PSTL_CONFIG_SITE
+#define __PSTL_CONFIG_SITE
+
+#cmakedefine _PSTL_PAR_BACKEND_SERIAL
+#cmakedefine _PSTL_PAR_BACKEND_TBB
+#cmakedefine _PSTL_PAR_BACKEND_OPENMP
+#cmakedefine _PSTL_HIDE_FROM_ABI_PER_TU
+
+#endif // __PSTL_CONFIG_SITE

diff  --git a/libcxx/include/__pstl_execution b/libcxx/include/__pstl_execution
new file mode 100644
index 0000000000000..0e2cd44561cdf
--- /dev/null
+++ b/libcxx/include/__pstl_execution
@@ -0,0 +1,15 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __PSTL_EXECUTION
+#define __PSTL_EXECUTION
+
+#include <pstl/internal/glue_execution_defs.h>
+
+#endif /* __PSTL_EXECUTION */

diff  --git a/libcxx/include/__pstl_memory b/libcxx/include/__pstl_memory
new file mode 100644
index 0000000000000..12b7f5aa3c04b
--- /dev/null
+++ b/libcxx/include/__pstl_memory
@@ -0,0 +1,15 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __PSTL_MEMORY
+#define __PSTL_MEMORY
+
+#include <pstl/internal/glue_memory_impl.h>
+
+#endif /* __PSTL_MEMORY */

diff  --git a/libcxx/include/__pstl_numeric b/libcxx/include/__pstl_numeric
new file mode 100644
index 0000000000000..cf168ef7053ba
--- /dev/null
+++ b/libcxx/include/__pstl_numeric
@@ -0,0 +1,15 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __PSTL_NUMERIC
+#define __PSTL_NUMERIC
+
+#include <pstl/internal/glue_numeric_impl.h>
+
+#endif /* __PSTL_NUMERIC */

diff  --git a/libcxx/include/pstl/internal/algorithm_fwd.h b/libcxx/include/pstl/internal/algorithm_fwd.h
new file mode 100644
index 0000000000000..3dcf9e9f5e9eb
--- /dev/null
+++ b/libcxx/include/pstl/internal/algorithm_fwd.h
@@ -0,0 +1,1202 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_ALGORITHM_FWD_H
+#define _PSTL_ALGORITHM_FWD_H
+
+#include <iterator>
+#include <type_traits>
+#include <utility>
+
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+//------------------------------------------------------------------------
+// any_of
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Pred>
+bool
+__brick_any_of(const _ForwardIterator, const _ForwardIterator, _Pred,
+               /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Pred>
+bool
+__brick_any_of(const _RandomAccessIterator, const _RandomAccessIterator, _Pred,
+               /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Pred>
+bool
+__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Pred) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Pred>
+bool
+__pattern_any_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Pred);
+
+//------------------------------------------------------------------------
+// walk1 (pseudo)
+//
+// walk1 evaluates f(x) for each dereferenced value x drawn from [first,last)
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Function>
+void __brick_walk1(_ForwardIterator, _ForwardIterator, _Function,
+                   /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Function>
+void __brick_walk1(_RandomAccessIterator, _RandomAccessIterator, _Function,
+                   /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Function>
+void
+__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Function) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Function>
+void
+__pattern_walk1(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Function);
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Brick>
+void
+__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Brick) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Brick>
+void
+__pattern_walk_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                     _Brick);
+
+//------------------------------------------------------------------------
+// walk1_n
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Size, class _Function>
+_ForwardIterator __brick_walk1_n(_ForwardIterator, _Size, _Function,
+                                 /*_IsVectorTag=*/std::false_type);
+
+template <class _RandomAccessIterator, class _DifferenceType, class _Function>
+_RandomAccessIterator __brick_walk1_n(_RandomAccessIterator, _DifferenceType, _Function,
+                                      /*vectorTag=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Function>
+_ForwardIterator
+__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Function) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Function>
+_RandomAccessIterator
+__pattern_walk1_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Function);
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Brick>
+_ForwardIterator
+__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _Size, _Brick) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Brick>
+_RandomAccessIterator
+__pattern_walk_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, _Brick);
+
+//------------------------------------------------------------------------
+// walk2 (pseudo)
+//
+// walk2 evaluates f(x,y) for deferenced values (x,y) drawn from [first1,last1) and [first2,...)
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Function>
+_ForwardIterator2 __brick_walk2(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function,
+                                /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Function>
+_RandomAccessIterator2 __brick_walk2(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Function,
+                                     /*vector=*/std::true_type) noexcept;
+
+template <class _ForwardIterator1, class _Size, class _ForwardIterator2, class _Function>
+_ForwardIterator2 __brick_walk2_n(_ForwardIterator1, _Size, _ForwardIterator2, _Function,
+                                  /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _Size, class _RandomAccessIterator2, class _Function>
+_RandomAccessIterator2 __brick_walk2_n(_RandomAccessIterator1, _Size, _RandomAccessIterator2, _Function,
+                                       /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Function>
+_ForwardIterator2
+__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Function) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Function>
+_RandomAccessIterator2
+__pattern_walk2(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                _RandomAccessIterator2, _Function);
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2,
+          class _Function>
+_ForwardIterator2
+__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Function) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _Size,
+          class _RandomAccessIterator2, class _Function>
+_RandomAccessIterator2
+__pattern_walk2_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size, _RandomAccessIterator2,
+                  _Function);
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Brick>
+_ForwardIterator2
+__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                      _Brick) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Brick>
+_RandomAccessIterator2
+__pattern_walk2_brick(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                      _RandomAccessIterator2, _Brick);
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2,
+          class _Brick>
+_ForwardIterator2
+__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _Size, _ForwardIterator2, _Brick) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _Size,
+          class _RandomAccessIterator2, class _Brick>
+_RandomAccessIterator2
+__pattern_walk2_brick_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _Size,
+                        _RandomAccessIterator2, _Brick);
+
+//------------------------------------------------------------------------
+// walk3 (pseudo)
+//
+// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator3, class _Function>
+_ForwardIterator3 __brick_walk3(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3, _Function,
+                                /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Function>
+_RandomAccessIterator3 __brick_walk3(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                     _RandomAccessIterator3, _Function,
+                                     /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator3,
+          class _Function>
+_ForwardIterator3
+__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator3,
+                _Function) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _Function>
+_RandomAccessIterator3
+__pattern_walk3(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                _RandomAccessIterator2, _RandomAccessIterator3, _Function);
+
+//------------------------------------------------------------------------
+// equal
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _BinaryPredicate,
+                   /* is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+bool __brick_equal(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _BinaryPredicate,
+                   /* is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool
+__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+bool
+__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                _RandomAccessIterator2, _BinaryPredicate);
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool __brick_equal(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2, _BinaryPredicate,
+                   /* is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+bool __brick_equal(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2,
+                   _BinaryPredicate, /* is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool
+__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+bool
+__pattern_equal(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate);
+
+//------------------------------------------------------------------------
+// find_if
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Predicate>
+_ForwardIterator __brick_find_if(_ForwardIterator, _ForwardIterator, _Predicate,
+                                 /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Predicate>
+_RandomAccessIterator __brick_find_if(_RandomAccessIterator, _RandomAccessIterator, _Predicate,
+                                      /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+_ForwardIterator
+__pattern_find_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Predicate>
+_RandomAccessIterator
+__pattern_find_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                  _Predicate);
+
+//------------------------------------------------------------------------
+// find_end
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1 __brick_find_end(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                   _BinaryPredicate,
+                                   /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1 __brick_find_end(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                        _RandomAccessIterator2, _BinaryPredicate,
+                                        /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                   _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator1
+__pattern_find_end(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                   _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// find_first_of
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1 __brick_find_first_of(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                        _BinaryPredicate,
+                                        /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1 __brick_find_first_of(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                             _RandomAccessIterator2, _BinaryPredicate,
+                                             /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                        _ForwardIterator2, _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator1
+__pattern_find_first_of(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                        _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// search
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1 __brick_search(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                 _BinaryPredicate,
+                                 /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1 __brick_search(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                      _RandomAccessIterator2, _BinaryPredicate,
+                                      /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                 _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator1
+__pattern_search(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                 _RandomAccessIterator2, _RandomAccessIterator2, _BinaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// search_n
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
+_ForwardIterator
+__brick_search_n(_ForwardIterator, _ForwardIterator, _Size, const _Tp&, _BinaryPredicate,
+                 /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Size, class _Tp, class _BinaryPredicate>
+_RandomAccessIterator
+__brick_search_n(_RandomAccessIterator, _RandomAccessIterator, _Size, const _Tp&, _BinaryPredicate,
+                 /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
+_ForwardIterator
+__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Size, const _Tp&,
+                   _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Tp,
+          class _BinaryPredicate>
+_RandomAccessIterator
+__pattern_search_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Size,
+                   const _Tp&, _BinaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// copy_n
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Size, class _OutputIterator>
+_OutputIterator __brick_copy_n(_ForwardIterator, _Size, _OutputIterator,
+                               /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Size, class _OutputIterator>
+_OutputIterator __brick_copy_n(_RandomAccessIterator, _Size, _OutputIterator,
+                               /*vector=*/std::true_type) noexcept;
+
+//------------------------------------------------------------------------
+// copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator __brick_copy(_ForwardIterator, _ForwardIterator, _OutputIterator,
+                             /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator __brick_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator,
+                             /*vector=*/std::true_type) noexcept;
+
+//------------------------------------------------------------------------
+// move
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator __brick_move(_ForwardIterator, _ForwardIterator, _OutputIterator,
+                             /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator __brick_move(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator,
+                             /*vector=*/std::true_type) noexcept;
+
+//------------------------------------------------------------------------
+// swap_ranges
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator __brick_swap_ranges(_ForwardIterator, _ForwardIterator, _OutputIterator,
+                                    /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator __brick_swap_ranges(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator,
+                                    /*vector=*/std::true_type) noexcept;
+
+//------------------------------------------------------------------------
+// copy_if
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
+_OutputIterator __brick_copy_if(_ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryPredicate,
+                                /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator, class _UnaryPredicate>
+_OutputIterator __brick_copy_if(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _UnaryPredicate,
+                                /*vector=*/std::true_type) noexcept;
+
+template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
+std::pair<_DifferenceType, _DifferenceType>
+__brick_calc_mask_1(_ForwardIterator, _ForwardIterator, bool* __restrict, _UnaryPredicate,
+                    /*vector=*/std::false_type) noexcept;
+template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
+std::pair<_DifferenceType, _DifferenceType>
+__brick_calc_mask_1(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _UnaryPredicate,
+                    /*vector=*/std::true_type) noexcept;
+
+template <class _ForwardIterator, class _OutputIterator>
+void
+__brick_copy_by_mask(_ForwardIterator, _ForwardIterator, _OutputIterator, bool*,
+                     /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator>
+void
+__brick_copy_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, bool* __restrict,
+                     /*vector=*/std::true_type) noexcept;
+
+template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2>
+void
+__brick_partition_by_mask(_ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, bool*,
+                          /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator1, class _OutputIterator2>
+void
+__brick_partition_by_mask(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator1, _OutputIterator2, bool*,
+                          /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
+_OutputIterator
+__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
+                  _UnaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _UnaryPredicate>
+_OutputIterator
+__pattern_copy_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                  _OutputIterator, _UnaryPredicate);
+
+//------------------------------------------------------------------------
+// count
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _Predicate>
+typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type
+    __brick_count(_RandomAccessIterator, _RandomAccessIterator, _Predicate,
+                  /* is_vector = */ std::true_type) noexcept;
+
+template <class _ForwardIterator, class _Predicate>
+typename std::iterator_traits<_ForwardIterator>::
diff erence_type
+    __brick_count(_ForwardIterator, _ForwardIterator, _Predicate,
+                  /* is_vector = */ std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+typename std::iterator_traits<_ForwardIterator>::
diff erence_type
+__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Predicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Predicate>
+typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type
+__pattern_count(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                _Predicate);
+
+//------------------------------------------------------------------------
+// unique
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator __brick_unique(_ForwardIterator, _ForwardIterator, _BinaryPredicate,
+                                /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator __brick_unique(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate,
+                                     /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator
+__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__pattern_unique(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                 _BinaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// unique_copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class OutputIterator, class _BinaryPredicate>
+OutputIterator __brick_unique_copy(_ForwardIterator, _ForwardIterator, OutputIterator, _BinaryPredicate,
+                                   /*vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator, class _BinaryPredicate>
+_OutputIterator __brick_unique_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator, _BinaryPredicate,
+                                    /*vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryPredicate>
+_OutputIterator
+__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
+                      _BinaryPredicate) noexcept;
+
+template <class _ExecutionPolicy, class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
+_DifferenceType
+__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate,
+                    /*vector=*/std::false_type) noexcept;
+
+template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
+_DifferenceType
+__brick_calc_mask_2(_RandomAccessIterator, _RandomAccessIterator, bool* __restrict, _BinaryPredicate,
+                    /*vector=*/std::true_type) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _BinaryPredicate>
+_OutputIterator
+__pattern_unique_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                      _OutputIterator, _BinaryPredicate);
+
+//------------------------------------------------------------------------
+// reverse
+//------------------------------------------------------------------------
+
+template <class _BidirectionalIterator>
+void __brick_reverse(_BidirectionalIterator, _BidirectionalIterator,
+                     /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator>
+void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator,
+                     /*__is_vector=*/std::true_type) noexcept;
+
+template <class _BidirectionalIterator>
+void __brick_reverse(_BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator,
+                     /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator>
+void __brick_reverse(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+                     /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator>
+void
+__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator>
+void
+__pattern_reverse(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator);
+
+//------------------------------------------------------------------------
+// reverse_copy
+//------------------------------------------------------------------------
+
+template <class _BidirectionalIterator, class _OutputIterator>
+_OutputIterator __brick_reverse_copy(_BidirectionalIterator, _BidirectionalIterator, _OutputIterator,
+                                     /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator __brick_reverse_copy(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator,
+                                     /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator, class _OutputIterator>
+_OutputIterator
+__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator,
+                       _OutputIterator) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator
+__pattern_reverse_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                       _OutputIterator);
+
+//------------------------------------------------------------------------
+// rotate
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator>
+_ForwardIterator __brick_rotate(_ForwardIterator, _ForwardIterator, _ForwardIterator,
+                                /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator>
+_RandomAccessIterator __brick_rotate(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+                                     /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator>
+_ForwardIterator
+__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator>
+_RandomAccessIterator
+__pattern_rotate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                 _RandomAccessIterator);
+
+//------------------------------------------------------------------------
+// rotate_copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator __brick_rotate_copy(_ForwardIterator, _ForwardIterator, _ForwardIterator, _OutputIterator,
+                                    /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator __brick_rotate_copy(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+                                    _OutputIterator,
+                                    /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator>
+_OutputIterator
+__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _ForwardIterator,
+                      _OutputIterator) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator>
+_OutputIterator
+__pattern_rotate_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                      _RandomAccessIterator, _OutputIterator);
+
+//------------------------------------------------------------------------
+// is_partitioned
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _UnaryPredicate>
+bool __brick_is_partitioned(_ForwardIterator, _ForwardIterator, _UnaryPredicate,
+                            /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+bool __brick_is_partitioned(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate,
+                            /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+bool
+__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+bool
+__pattern_is_partitioned(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                         _UnaryPredicate);
+
+//------------------------------------------------------------------------
+// partition
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator __brick_partition(_ForwardIterator, _ForwardIterator, _UnaryPredicate,
+                                   /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator __brick_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate,
+                                        /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator
+__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__pattern_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                    _UnaryPredicate);
+
+//------------------------------------------------------------------------
+// stable_partition
+//------------------------------------------------------------------------
+
+template <class _BidirectionalIterator, class _UnaryPredicate>
+_BidirectionalIterator __brick_stable_partition(_BidirectionalIterator, _BidirectionalIterator, _UnaryPredicate,
+                                                /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator __brick_stable_partition(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate,
+                                               /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator, class _UnaryPredicate>
+_BidirectionalIterator
+__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator,
+                           _UnaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__pattern_stable_partition(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                           _UnaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// partition_copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2, class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2>
+    __brick_partition_copy(_ForwardIterator, _ForwardIterator, _OutputIterator1, _OutputIterator2, _UnaryPredicate,
+                           /*is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator1, class _OutputIterator2, class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2> __brick_partition_copy(_RandomAccessIterator, _RandomAccessIterator,
+                                                                     _OutputIterator1, _OutputIterator2,
+                                                                     _UnaryPredicate,
+                                                                     /*is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator1, class _OutputIterator2,
+          class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2>
+__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator1,
+                         _OutputIterator2, _UnaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator1,
+          class _OutputIterator2, class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2>
+__pattern_partition_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                         _OutputIterator1, _OutputIterator2, _UnaryPredicate);
+
+//------------------------------------------------------------------------
+// sort
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsMoveConstructible>
+void
+__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare,
+               _IsMoveConstructible) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare,
+               /*is_move_constructible=*/std::true_type);
+
+//------------------------------------------------------------------------
+// stable_sort
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_stable_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                      _Compare);
+
+//------------------------------------------------------------------------
+// partial_sort
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+                       _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_partial_sort(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                       _RandomAccessIterator, _Compare);
+
+//------------------------------------------------------------------------
+// partial_sort_copy
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+_RandomAccessIterator2
+__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                            _RandomAccessIterator2, _RandomAccessIterator2, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+_RandomAccessIterator2
+__pattern_partial_sort_copy(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1,
+                            _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _Compare);
+
+//------------------------------------------------------------------------
+// adjacent_find
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__brick_adjacent_find(_RandomAccessIterator, _RandomAccessIterator, _BinaryPredicate,
+                      /* IsVector = */ std::true_type, bool) noexcept;
+
+template <class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator
+__brick_adjacent_find(_ForwardIterator, _ForwardIterator, _BinaryPredicate,
+                      /* IsVector = */ std::false_type, bool) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator
+__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _BinaryPredicate, bool) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__pattern_adjacent_find(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                        _BinaryPredicate, bool);
+
+//------------------------------------------------------------------------
+// nth_element
+//------------------------------------------------------------------------
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator,
+                      _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_nth_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                      _RandomAccessIterator, _Compare) noexcept;
+
+//------------------------------------------------------------------------
+// fill, fill_n
+//------------------------------------------------------------------------
+template <class _RandomAccessIterator, class _Tp>
+void
+__brick_fill(_RandomAccessIterator, _RandomAccessIterator, const _Tp&,
+             /* __is_vector = */ std::true_type) noexcept;
+
+template <class _ForwardIterator, class _Tp>
+void
+__brick_fill(_ForwardIterator, _ForwardIterator, const _Tp&,
+             /* __is_vector = */ std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+void
+__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, const _Tp&) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Tp>
+_RandomAccessIterator
+__pattern_fill(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, const _Tp&);
+
+template <class _RandomAccessIterator, class _Size, class _Tp>
+_RandomAccessIterator
+__brick_fill_n(_RandomAccessIterator, _Size, const _Tp&,
+               /* __is_vector = */ std::true_type) noexcept;
+
+template <class _OutputIterator, class _Size, class _Tp>
+_OutputIterator
+__brick_fill_n(_OutputIterator, _Size, const _Tp&,
+               /* __is_vector = */ std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _OutputIterator, class _Size, class _Tp>
+_OutputIterator
+__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator, _Size, const _Tp&) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Tp>
+_RandomAccessIterator
+__pattern_fill_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _Size, const _Tp&);
+
+//------------------------------------------------------------------------
+// generate, generate_n
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _Generator>
+void __brick_generate(_RandomAccessIterator, _RandomAccessIterator, _Generator,
+                      /* is_vector = */ std::true_type) noexcept;
+
+template <class _ForwardIterator, class _Generator>
+void __brick_generate(_ForwardIterator, _ForwardIterator, _Generator,
+                      /* is_vector = */ std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Generator>
+void
+__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Generator) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Generator>
+_RandomAccessIterator
+__pattern_generate(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                   _Generator);
+
+template <class _RandomAccessIterator, class Size, class _Generator>
+_RandomAccessIterator __brick_generate_n(_RandomAccessIterator, Size, _Generator,
+                                         /* is_vector = */ std::true_type) noexcept;
+
+template <class OutputIterator, class Size, class _Generator>
+OutputIterator __brick_generate_n(OutputIterator, Size, _Generator,
+                                  /* is_vector = */ std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class OutputIterator, class Size, class _Generator>
+OutputIterator
+__pattern_generate_n(_Tag, _ExecutionPolicy&&, OutputIterator, Size, _Generator) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class Size, class _Generator>
+_RandomAccessIterator
+__pattern_generate_n(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, Size, _Generator);
+
+//------------------------------------------------------------------------
+// remove
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator __brick_remove_if(_ForwardIterator, _ForwardIterator, _UnaryPredicate,
+                                   /* __is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator __brick_remove_if(_RandomAccessIterator, _RandomAccessIterator, _UnaryPredicate,
+                                        /* __is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator
+__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _UnaryPredicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__pattern_remove_if(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                    _UnaryPredicate) noexcept;
+
+//------------------------------------------------------------------------
+// merge
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_merge(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                              _OutputIterator, _Compare,
+                              /* __is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_merge(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                              _RandomAccessIterator2, _OutputIterator, _Compare,
+                              /* __is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                _OutputIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _OutputIterator, class _Compare>
+_OutputIterator
+__pattern_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare);
+
+//------------------------------------------------------------------------
+// inplace_merge
+//------------------------------------------------------------------------
+
+template <class _BidirectionalIterator, class _Compare>
+void __brick_inplace_merge(_BidirectionalIterator, _BidirectionalIterator, _BidirectionalIterator, _Compare,
+                           /* __is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Compare>
+void __brick_inplace_merge(_RandomAccessIterator, _RandomAccessIterator, _RandomAccessIterator, _Compare,
+                           /* __is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator, class _Compare>
+void
+__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator, _BidirectionalIterator,
+                        _BidirectionalIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_inplace_merge(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                        _RandomAccessIterator, _Compare);
+
+//------------------------------------------------------------------------
+// includes
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+bool
+__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                   _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+bool
+__pattern_includes(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                   _RandomAccessIterator2, _RandomAccessIterator2, _Compare);
+
+//------------------------------------------------------------------------
+// set_union
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_union(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                  _OutputIterator, _Compare,
+                                  /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_union(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                  _RandomAccessIterator2, _OutputIterator, _Compare,
+                                  /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                    _ForwardIterator2, _OutputIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _OutputIterator, class _Compare>
+_OutputIterator
+__pattern_set_union(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                    _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare);
+
+//------------------------------------------------------------------------
+// set_intersection
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_intersection(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                         _OutputIterator, _Compare,
+                                         /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_intersection(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                         _RandomAccessIterator2, _OutputIterator, _Compare,
+                                         /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                           _ForwardIterator2, _OutputIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _OutputIterator, class _Compare>
+_OutputIterator
+__pattern_set_intersection(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1,
+                           _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator,
+                           _Compare);
+
+//------------------------------------------------------------------------
+// set_
diff erence
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_
diff erence(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                       _OutputIterator, _Compare,
+                                       /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_
diff erence(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                       _RandomAccessIterator2, _OutputIterator, _Compare,
+                                       /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_
diff erence(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                         _ForwardIterator2, _OutputIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _OutputIterator, class _Compare>
+_OutputIterator
+__pattern_set_
diff erence(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                         _RandomAccessIterator2, _RandomAccessIterator2, _OutputIterator, _Compare);
+
+//------------------------------------------------------------------------
+// set_symmetric_
diff erence
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_symmetric_
diff erence(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                                                 _ForwardIterator2, _OutputIterator, _Compare,
+                                                 /*__is_vector=*/std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator, class _Compare>
+_OutputIterator __brick_set_symmetric_
diff erence(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                                 _RandomAccessIterator2, _OutputIterator, _Compare,
+                                                 /*__is_vector=*/std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_symmetric_
diff erence(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                                   _ForwardIterator2, _OutputIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _OutputIterator, class _Compare>
+_OutputIterator
+__pattern_set_symmetric_
diff erence(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1,
+                                   _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2,
+                                   _OutputIterator, _Compare);
+
+//------------------------------------------------------------------------
+// is_heap_until
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare,
+                                            /* __is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator __brick_is_heap_until(_RandomAccessIterator, _RandomAccessIterator, _Compare,
+                                            /* __is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__pattern_is_heap_until(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                        _Compare) noexcept;
+
+//------------------------------------------------------------------------
+// min_element
+//------------------------------------------------------------------------
+
+template <typename _ForwardIterator, typename _Compare>
+_ForwardIterator __brick_min_element(_ForwardIterator, _ForwardIterator, _Compare,
+                                     /* __is_vector = */ std::false_type) noexcept;
+
+template <typename _RandomAccessIterator, typename _Compare>
+_RandomAccessIterator __brick_min_element(_RandomAccessIterator, _RandomAccessIterator, _Compare,
+                                          /* __is_vector = */ std::true_type) noexcept;
+
+template <typename _Tag, typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare>
+_ForwardIterator
+__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept;
+
+template <typename _IsVector, typename _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare>
+_RandomAccessIterator
+__pattern_min_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                      _Compare);
+
+//------------------------------------------------------------------------
+// minmax_element
+//------------------------------------------------------------------------
+
+template <typename _ForwardIterator, typename _Compare>
+std::pair<_ForwardIterator, _ForwardIterator> __brick_minmax_element(_ForwardIterator, _ForwardIterator, _Compare,
+                                                                     /* __is_vector = */ std::false_type) noexcept;
+
+template <typename _RandomAccessIterator, typename _Compare>
+std::pair<_RandomAccessIterator, _RandomAccessIterator>
+    __brick_minmax_element(_RandomAccessIterator, _RandomAccessIterator, _Compare,
+                           /* __is_vector = */ std::true_type) noexcept;
+
+template <typename _Tag, typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare>
+std::pair<_ForwardIterator, _ForwardIterator>
+__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Compare) noexcept;
+
+template <typename _IsVector, typename _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare>
+std::pair<_RandomAccessIterator, _RandomAccessIterator>
+__pattern_minmax_element(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                         _Compare);
+
+//------------------------------------------------------------------------
+// mismatch
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+std::pair<_ForwardIterator1, _ForwardIterator2> __brick_mismatch(_ForwardIterator1, _ForwardIterator1,
+                                                                 _ForwardIterator2, _ForwardIterator2, _Predicate,
+                                                                 /* __is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Predicate>
+std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
+    __brick_mismatch(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2,
+                     _Predicate,
+                     /* __is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+std::pair<_ForwardIterator1, _ForwardIterator2>
+__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                   _Predicate) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Predicate>
+std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
+__pattern_mismatch(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1, _RandomAccessIterator1,
+                   _RandomAccessIterator2, _RandomAccessIterator2, _Predicate) noexcept;
+
+//------------------------------------------------------------------------
+// lexicographical_compare
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+bool __brick_lexicographical_compare(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _ForwardIterator2,
+                                     _Compare,
+                                     /* __is_vector = */ std::false_type) noexcept;
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Compare>
+bool __brick_lexicographical_compare(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2,
+                                     _RandomAccessIterator2, _Compare,
+                                     /* __is_vector = */ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+bool
+__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2,
+                                  _ForwardIterator2, _Compare) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+bool
+__pattern_lexicographical_compare(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1,
+                                  _RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator2,
+                                  _Compare) noexcept;
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_ALGORITHM_FWD_H */

diff  --git a/libcxx/include/pstl/internal/algorithm_impl.h b/libcxx/include/pstl/internal/algorithm_impl.h
new file mode 100644
index 0000000000000..2b505d94611f6
--- /dev/null
+++ b/libcxx/include/pstl/internal/algorithm_impl.h
@@ -0,0 +1,3819 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_ALGORITHM_IMPL_H
+#define _PSTL_ALGORITHM_IMPL_H
+
+#include <iterator>
+#include <type_traits>
+#include <utility>
+#include <functional>
+#include <algorithm>
+
+#include "execution_impl.h"
+#include "memory_impl.h"
+#include "parallel_backend.h"
+#include "parallel_backend_utils.h"
+#include "parallel_impl.h"
+#include "pstl_config.h"
+#include "unseq_backend_simd.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+//------------------------------------------------------------------------
+// any_of
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Pred>
+bool
+__brick_any_of(const _ForwardIterator __first, const _ForwardIterator __last, _Pred __pred,
+               /*__is_vector=*/std::false_type) noexcept
+{
+    return std::any_of(__first, __last, __pred);
+};
+
+template <class _RandomAccessIterator, class _Pred>
+bool
+__brick_any_of(const _RandomAccessIterator __first, const _RandomAccessIterator __last, _Pred __pred,
+               /*__is_vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_or(__first, __last - __first, __pred);
+};
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Pred>
+bool
+__pattern_any_of(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred) noexcept
+{
+    return __internal::__brick_any_of(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Pred>
+bool
+__pattern_any_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                 _RandomAccessIterator __last, _Pred __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __internal::__parallel_or(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                             [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j)
+                                             { return __internal::__brick_any_of(__i, __j, __pred, _IsVector{}); });
+        });
+}
+
+// [alg.foreach]
+// for_each_n with no policy
+
+template <class _ForwardIterator, class _Size, class _Function>
+_ForwardIterator
+__for_each_n_it_serial(_ForwardIterator __first, _Size __n, _Function __f)
+{
+    for (; __n > 0; ++__first, --__n)
+        __f(__first);
+    return __first;
+}
+
+//------------------------------------------------------------------------
+// walk1 (pseudo)
+//
+// walk1 evaluates f(x) for each dereferenced value x drawn from [first,last)
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _Function>
+void
+__brick_walk1(_ForwardIterator __first, _ForwardIterator __last, _Function __f, /*vector=*/std::false_type) noexcept
+{
+    std::for_each(__first, __last, __f);
+}
+
+template <class _RandomAccessIterator, class _Function>
+void
+__brick_walk1(_RandomAccessIterator __first, _RandomAccessIterator __last, _Function __f,
+              /*vector=*/std::true_type) noexcept
+{
+    __unseq_backend::__simd_walk_1(__first, __last - __first, __f);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Function>
+void
+__pattern_walk1(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Function __f) noexcept
+{
+    __internal::__brick_walk1(__first, __last, __f, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Function>
+void
+__pattern_walk1(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                _RandomAccessIterator __last, _Function __f)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                          [__f](_RandomAccessIterator __i, _RandomAccessIterator __j)
+                                          { __internal::__brick_walk1(__i, __j, __f, _IsVector{}); });
+        });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Brick>
+void
+__pattern_walk_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                     _Brick __brick) noexcept
+{
+    __brick(__first, __last);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Brick>
+void
+__pattern_walk_brick(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                     _RandomAccessIterator __last, _Brick __brick)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                          [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j)
+                                          { __brick(__i, __j); });
+        });
+}
+
+//------------------------------------------------------------------------
+// walk1_n
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _Size, class _Function>
+_ForwardIterator
+__brick_walk1_n(_ForwardIterator __first, _Size __n, _Function __f, /*_IsVectorTag=*/std::false_type)
+{
+    return __internal::__for_each_n_it_serial(__first, __n,
+                                              [&__f](_ForwardIterator __it) { __f(*__it); }); // calling serial version
+}
+
+template <class _RandomAccessIterator, class _DifferenceType, class _Function>
+_RandomAccessIterator
+__brick_walk1_n(_RandomAccessIterator __first, _DifferenceType __n, _Function __f,
+                /*vectorTag=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_walk_1(__first, __n, __f);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Function>
+_ForwardIterator
+__pattern_walk1_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Function __f) noexcept
+{
+    return __internal::__brick_walk1_n(__first, __n, __f, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Function>
+_RandomAccessIterator
+__pattern_walk1_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first, _Size __n,
+                  _Function __f)
+{
+    __internal::__pattern_walk1(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __n, __f);
+
+    return __first + __n;
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Brick>
+_ForwardIterator
+__pattern_walk_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _Size __n, _Brick __brick) noexcept
+{
+    return __brick(__first, __n);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Brick>
+_RandomAccessIterator
+__pattern_walk_brick_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                       _Size __n, _Brick __brick)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __first + __n,
+                [__brick](_RandomAccessIterator __i, _RandomAccessIterator __j) { __brick(__i, __j - __i); });
+            return __first + __n;
+        });
+}
+
+//------------------------------------------------------------------------
+// walk2 (pseudo)
+//
+// walk2 evaluates f(x,y) for deferenced values (x,y) drawn from [first1,last1) and [first2,...)
+//------------------------------------------------------------------------
+template <class _ForwardIterator1, class _ForwardIterator2, class _Function>
+_ForwardIterator2
+__brick_walk2(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Function __f,
+              /*vector=*/std::false_type) noexcept
+{
+    for (; __first1 != __last1; ++__first1, ++__first2)
+        __f(*__first1, *__first2);
+    return __first2;
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Function>
+_RandomAccessIterator2
+__brick_walk2(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+              _Function __f,
+              /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_walk_2(__first1, __last1 - __first1, __first2, __f);
+}
+
+template <class _ForwardIterator1, class _Size, class _ForwardIterator2, class _Function>
+_ForwardIterator2
+__brick_walk2_n(_ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2, _Function __f,
+                /*vector=*/std::false_type) noexcept
+{
+    for (; __n > 0; --__n, ++__first1, ++__first2)
+        __f(*__first1, *__first2);
+    return __first2;
+}
+
+template <class _RandomAccessIterator1, class _Size, class _RandomAccessIterator2, class _Function>
+_RandomAccessIterator2
+__brick_walk2_n(_RandomAccessIterator1 __first1, _Size __n, _RandomAccessIterator2 __first2, _Function __f,
+                /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_walk_2(__first1, __n, __first2, __f);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Function>
+_ForwardIterator2
+__pattern_walk2(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                _ForwardIterator2 __first2, _Function __f) noexcept
+{
+    return __internal::__brick_walk2(__first1, __last1, __first2, __f, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Function>
+_RandomAccessIterator2
+__pattern_walk2(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Function __f)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                [__f, __first1, __first2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+                { __internal::__brick_walk2(__i, __j, __first2 + (__i - __first1), __f, _IsVector{}); });
+            return __first2 + (__last1 - __first1);
+        });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2,
+          class _Function>
+_ForwardIterator2
+__pattern_walk2_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2,
+                  _Function __f) noexcept
+{
+    return __internal::__brick_walk2_n(__first1, __n, __first2, __f, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _Size,
+          class _RandomAccessIterator2, class _Function>
+_RandomAccessIterator2
+__pattern_walk2_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                  _Size __n, _RandomAccessIterator2 __first2, _Function __f)
+{
+    return __internal::__pattern_walk2(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
+                                       __first2, __f);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Brick>
+_ForwardIterator2
+__pattern_walk2_brick(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                      _ForwardIterator2 __first2, _Brick __brick) noexcept
+{
+    return __brick(__first1, __last1, __first2);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Brick>
+_RandomAccessIterator2
+__pattern_walk2_brick(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                      _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Brick __brick)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+                { __brick(__i, __j, __first2 + (__i - __first1)); });
+            return __first2 + (__last1 - __first1);
+        });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2,
+          class _Brick>
+_ForwardIterator2
+__pattern_walk2_brick_n(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _Size __n, _ForwardIterator2 __first2,
+                        _Brick __brick) noexcept
+{
+    return __brick(__first1, __n, __first2);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _Size,
+          class _RandomAccessIterator2, class _Brick>
+_RandomAccessIterator2
+__pattern_walk2_brick_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                        _Size __n, _RandomAccessIterator2 __first2, _Brick __brick)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
+                [__first1, __first2, __brick](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+                { __brick(__i, __j - __i, __first2 + (__i - __first1)); });
+            return __first2 + __n;
+        });
+}
+
+//------------------------------------------------------------------------
+// walk3 (pseudo)
+//
+// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
+//------------------------------------------------------------------------
+template <class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator3, class _Function>
+_ForwardIterator3
+__brick_walk3(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+              _ForwardIterator3 __first3, _Function __f, /*vector=*/std::false_type) noexcept
+{
+    for (; __first1 != __last1; ++__first1, ++__first2, ++__first3)
+        __f(*__first1, *__first2, *__first3);
+    return __first3;
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Function>
+_RandomAccessIterator3
+__brick_walk3(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+              _RandomAccessIterator3 __first3, _Function __f, /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_walk_3(__first1, __last1 - __first1, __first2, __first3, __f);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator3,
+          class _Function>
+_ForwardIterator3
+__pattern_walk3(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                _ForwardIterator2 __first2, _ForwardIterator3 __first3, _Function __f) noexcept
+{
+    return __internal::__brick_walk3(__first1, __last1, __first2, __first3, __f, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _Function>
+_RandomAccessIterator3
+__pattern_walk3(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator3 __first3,
+                _Function __f)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                [__f, __first1, __first2, __first3](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
+                    __internal::__brick_walk3(__i, __j, __first2 + (__i - __first1), __first3 + (__i - __first1), __f,
+                                              _IsVector{});
+                });
+            return __first3 + (__last1 - __first1);
+        });
+}
+
+//------------------------------------------------------------------------
+// equal
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool
+__brick_equal(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+              _ForwardIterator2 __last2, _BinaryPredicate __p, /* IsVector = */ std::false_type) noexcept
+{
+    return std::equal(__first1, __last1, __first2, __last2, __p);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+bool
+__brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+              _RandomAccessIterator2 __last2, _BinaryPredicate __p, /* is_vector = */ std::true_type) noexcept
+{
+    if (__last1 - __first1 != __last2 - __first2)
+        return false;
+
+    return __unseq_backend::__simd_first(__first1, __last1 - __first1, __first2, std::not_fn(__p)).first == __last1;
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool
+__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __p) noexcept
+{
+    return __internal::__brick_equal(__first1, __last1, __first2, __last2, __p, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+bool
+__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                _BinaryPredicate __p)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__last1 - __first1 != __last2 - __first2)
+        return false;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return !__internal::__parallel_or(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+                {
+                    return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1),
+                                                      __first2 + (__j - __first1), __p, _IsVector{});
+                });
+        });
+}
+
+//------------------------------------------------------------------------
+// equal version for sequences with equal length
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool
+__brick_equal(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __p,
+              /* IsVector = */ std::false_type) noexcept
+{
+    return std::equal(__first1, __last1, __first2, __p);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+bool
+__brick_equal(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+              _BinaryPredicate __p, /* is_vector = */ std::true_type) noexcept
+{
+    return __unseq_backend::__simd_first(__first1, __last1 - __first1, __first2, std::not_fn(__p)).first == __last1;
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+bool
+__pattern_equal(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                _ForwardIterator2 __first2, _BinaryPredicate __p) noexcept
+{
+    return __internal::__brick_equal(__first1, __last1, __first2, __p, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+bool
+__pattern_equal(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _BinaryPredicate __p)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return !__internal::__parallel_or(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                [__first1, __first2, __p](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+                { return !__internal::__brick_equal(__i, __j, __first2 + (__i - __first1), __p, _IsVector{}); });
+        });
+}
+
+//------------------------------------------------------------------------
+// find_if
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _Predicate>
+_ForwardIterator
+__brick_find_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
+                /*is_vector=*/std::false_type) noexcept
+{
+    return std::find_if(__first, __last, __pred);
+}
+
+template <class _RandomAccessIterator, class _Predicate>
+_RandomAccessIterator
+__brick_find_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _Predicate __pred,
+                /*is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _SizeType;
+    return __unseq_backend::__simd_first(
+        __first, _SizeType(0), __last - __first,
+        [&__pred](_RandomAccessIterator __it, _SizeType __i) { return __pred(__it[__i]); });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+_ForwardIterator
+__pattern_find_if(_Tag __tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                  _Predicate __pred) noexcept
+{
+    return __internal::__brick_find_if(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Predicate>
+_RandomAccessIterator
+__pattern_find_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                  _RandomAccessIterator __last, _Predicate __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __internal::__parallel_find(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                [__pred](_RandomAccessIterator __i, _RandomAccessIterator __j)
+                { return __internal::__brick_find_if(__i, __j, __pred, _IsVector{}); },
+                std::less<typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type>(),
+                /*is_first=*/true);
+        });
+}
+
+//------------------------------------------------------------------------
+// find_end
+//------------------------------------------------------------------------
+
+// find the first occurrence of the subsequence [s_first, s_last)
+//   or the  last occurrence of the subsequence in the range [first, last)
+// b_first determines what occurrence we want to find (first or last)
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate, class _IsVector>
+_RandomAccessIterator1
+__find_subrange(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator1 __global_last,
+                _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last, _BinaryPredicate __pred,
+                bool __b_first, _IsVector __is_vector) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::value_type _ValueType;
+    auto __n2 = __s_last - __s_first;
+    if (__n2 < 1)
+    {
+        return __b_first ? __first : __last;
+    }
+
+    auto __n1 = __global_last - __first;
+    if (__n1 < __n2)
+    {
+        return __last;
+    }
+
+    auto __cur = __last;
+    while (__first != __last && (__global_last - __first >= __n2))
+    {
+        // find position of *s_first in [first, last) (it can be start of subsequence)
+        __first = __internal::__brick_find_if(
+            __first, __last, __equal_value_by_pred<_ValueType, _BinaryPredicate>(*__s_first, __pred), __is_vector);
+
+        // if position that was found previously is the start of subsequence
+        // then we can exit the loop (b_first == true) or keep the position
+        // (b_first == false)
+        if (__first != __last && (__global_last - __first >= __n2) &&
+            __internal::__brick_equal(__s_first + 1, __s_last, __first + 1, __pred, __is_vector))
+        {
+            if (__b_first)
+            {
+                return __first;
+            }
+            else
+            {
+                __cur = __first;
+            }
+        }
+        else if (__first == __last)
+        {
+            break;
+        }
+        else
+        {
+        }
+
+        // in case of b_first == false we try to find new start position
+        // for the next subsequence
+        ++__first;
+    }
+    return __cur;
+}
+
+template <class _RandomAccessIterator, class _Size, class _Tp, class _BinaryPredicate, class _IsVector>
+_RandomAccessIterator
+__find_subrange(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __global_last,
+                _Size __count, const _Tp& __value, _BinaryPredicate __pred, _IsVector __is_vector) noexcept
+{
+    if (static_cast<_Size>(__global_last - __first) < __count || __count < 1)
+    {
+        return __last; // According to the standard last shall be returned when count < 1
+    }
+
+    auto __unary_pred = __equal_value_by_pred<_Tp, _BinaryPredicate>(__value, __pred);
+    while (__first != __last && (static_cast<_Size>(__global_last - __first) >= __count))
+    {
+        __first = __internal::__brick_find_if(__first, __last, __unary_pred, __is_vector);
+
+        // check that all of elements in [first+1, first+count) equal to value
+        if (__first != __last && (static_cast<_Size>(__global_last - __first) >= __count) &&
+            !__internal::__brick_any_of(__first + 1, __first + __count, std::not_fn(__unary_pred), __is_vector))
+        {
+            return __first;
+        }
+        else if (__first == __last)
+        {
+            break;
+        }
+        else
+        {
+            ++__first;
+        }
+    }
+    return __last;
+}
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__brick_find_end(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+                 _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::false_type) noexcept
+{
+    return std::find_end(__first, __last, __s_first, __s_last, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1
+__brick_find_end(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first,
+                 _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::true_type) noexcept
+{
+    return __find_subrange(__first, __last, __last, __s_first, __s_last, __pred, false, std::true_type());
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__pattern_find_end(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                   _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept
+{
+    return __internal::__brick_find_end(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator1
+__pattern_find_end(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                   _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last,
+                   _BinaryPredicate __pred) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__last - __first == __s_last - __s_first)
+    {
+        const bool __res = __internal::__pattern_equal(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                       __s_first, __pred);
+        return __res ? __first : __last;
+    }
+    else
+    {
+        return __internal::__except_handler(
+            [&]()
+            {
+                return __internal::__parallel_find(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                    [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
+                        return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, false,
+                                                           _IsVector{});
+                    },
+                    std::greater<typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type>(),
+                    /*is_first=*/false);
+            });
+    }
+}
+
+//------------------------------------------------------------------------
+// find_first_of
+//------------------------------------------------------------------------
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__brick_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+                      _ForwardIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::false_type) noexcept
+{
+    return std::find_first_of(__first, __last, __s_first, __s_last, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1
+__brick_find_first_of(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first,
+                      _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*__is_vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_find_first_of(__first, __last, __s_first, __s_last, __pred);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__pattern_find_first_of(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                        _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept
+{
+    return __internal::__brick_find_first_of(__first, __last, __s_first, __s_last, __pred,
+                                             typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator1
+__pattern_find_first_of(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                        _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first,
+                        _RandomAccessIterator2 __s_last, _BinaryPredicate __pred) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __internal::__parallel_find(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                [__s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+                { return __internal::__brick_find_first_of(__i, __j, __s_first, __s_last, __pred, _IsVector{}); },
+                std::less<typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type>(), /*is_first=*/true);
+        });
+}
+
+//------------------------------------------------------------------------
+// search
+//------------------------------------------------------------------------
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1
+__brick_search(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first,
+               _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept
+{
+    return std::search(__first, __last, __s_first, __s_last, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator1
+__brick_search(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first,
+               _RandomAccessIterator2 __s_last, _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
+{
+    return __internal::__find_subrange(__first, __last, __last, __s_first, __s_last, __pred, true, std::true_type());
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__pattern_search(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                 _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept
+{
+    return __internal::__brick_search(__first, __last, __s_first, __s_last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator1
+__pattern_search(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                 _RandomAccessIterator1 __last, _RandomAccessIterator2 __s_first, _RandomAccessIterator2 __s_last,
+                 _BinaryPredicate __pred) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__last - __first == __s_last - __s_first)
+    {
+        const bool __res = __internal::__pattern_equal(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                       __s_first, __pred);
+        return __res ? __first : __last;
+    }
+    else
+    {
+        return __internal::__except_handler(
+            [&]()
+            {
+                return __internal::__parallel_find(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                    [__last, __s_first, __s_last, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j) {
+                        return __internal::__find_subrange(__i, __j, __last, __s_first, __s_last, __pred, true,
+                                                           _IsVector{});
+                    },
+                    std::less<typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type>(),
+                    /*is_first=*/true);
+            });
+    }
+}
+
+//------------------------------------------------------------------------
+// search_n
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
+_ForwardIterator
+__brick_search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value,
+                 _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept
+{
+    return std::search_n(__first, __last, __count, __value, __pred);
+}
+
+template <class _RandomAccessIterator, class _Size, class _Tp, class _BinaryPredicate>
+_RandomAccessIterator
+__brick_search_n(_RandomAccessIterator __first, _RandomAccessIterator __last, _Size __count, const _Tp& __value,
+                 _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
+{
+    return __internal::__find_subrange(__first, __last, __last, __count, __value, __pred, std::true_type());
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
+_ForwardIterator
+__pattern_search_n(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Size __count,
+                   const _Tp& __value, _BinaryPredicate __pred) noexcept
+{
+    return __internal::__brick_search_n(__first, __last, __count, __value, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Tp,
+          class _BinaryPredicate>
+_RandomAccessIterator
+__pattern_search_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                   _RandomAccessIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (static_cast<_Size>(__last - __first) == __count)
+    {
+        const bool __result =
+            !__internal::__pattern_any_of(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                          [&__value, &__pred](const _Tp& __val) { return !__pred(__val, __value); });
+        return __result ? __first : __last;
+    }
+    else
+    {
+        return __internal::__except_handler(
+            [&__exec, __first, __last, __count, &__value, __pred]()
+            {
+                return __internal::__parallel_find(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                    [__last, __count, &__value, __pred](_RandomAccessIterator __i, _RandomAccessIterator __j)
+                    { return __internal::__find_subrange(__i, __j, __last, __count, __value, __pred, _IsVector{}); },
+                    std::less<typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type>(),
+                    /*is_first=*/true);
+            });
+    }
+}
+
+//------------------------------------------------------------------------
+// copy_n
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Size, class _OutputIterator>
+_OutputIterator
+__brick_copy_n(_ForwardIterator __first, _Size __n, _OutputIterator __result, /*vector=*/std::false_type) noexcept
+{
+    return std::copy_n(__first, __n, __result);
+}
+
+template <class _RandomAccessIterator1, class _Size, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__brick_copy_n(_RandomAccessIterator1 __first, _Size __n, _RandomAccessIterator2 __result,
+               /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_assign(
+        __first, __n, __result,
+        [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { *__result = *__first; });
+}
+
+//------------------------------------------------------------------------
+// copy
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator
+__brick_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+             /*vector=*/std::false_type) noexcept
+{
+    return std::copy(__first, __last, __result);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__brick_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+             /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_assign(
+        __first, __last - __first, __result,
+        [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { *__result = *__first; });
+}
+
+//------------------------------------------------------------------------
+// move
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator
+__brick_move(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+             /*vector=*/std::false_type) noexcept
+{
+    return std::move(__first, __last, __result);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__brick_move(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+             /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_assign(
+        __first, __last - __first, __result,
+        [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) { *__result = std::move(*__first); });
+}
+
+struct __brick_move_destroy
+{
+    template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+    _RandomAccessIterator2
+    operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+               /*vec*/ std::true_type) const
+    {
+        using _IteratorValueType = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
+
+        return __unseq_backend::__simd_assign(__first, __last - __first, __result,
+                                              [](_RandomAccessIterator1 __first, _RandomAccessIterator2 __result) {
+                                                  *__result = std::move(*__first);
+                                                  (*__first).~_IteratorValueType();
+                                              });
+    }
+
+    template <typename _RandomAccessIterator1, typename _RandomAccessIterator2>
+    _RandomAccessIterator2
+    operator()(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+               /*vec*/ std::false_type) const
+    {
+        using _IteratorValueType = typename std::iterator_traits<_RandomAccessIterator1>::value_type;
+
+        for (; __first != __last; ++__first, ++__result)
+        {
+            *__result = std::move(*__first);
+            (*__first).~_IteratorValueType();
+        }
+        return __result;
+    }
+};
+
+//------------------------------------------------------------------------
+// swap_ranges
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator
+__brick_swap_ranges(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+                    /*vector=*/std::false_type) noexcept
+{
+    return std::swap_ranges(__first, __last, __result);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__brick_swap_ranges(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+                    /*vector=*/std::true_type) noexcept
+{
+    using std::iter_swap;
+    return __unseq_backend::__simd_assign(__first, __last - __first, __result,
+                                          iter_swap<_RandomAccessIterator1, _RandomAccessIterator2>);
+}
+
+//------------------------------------------------------------------------
+// copy_if
+//------------------------------------------------------------------------
+template <class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
+_OutputIterator
+__brick_copy_if(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, _UnaryPredicate __pred,
+                /*vector=*/std::false_type) noexcept
+{
+    return std::copy_if(__first, __last, __result, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _UnaryPredicate>
+_RandomAccessIterator2
+__brick_copy_if(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+                _UnaryPredicate __pred,
+                /*vector=*/std::true_type) noexcept
+{
+#if defined(_PSTL_MONOTONIC_PRESENT)
+    return __unseq_backend::__simd_copy_if(__first, __last - __first, __result, __pred);
+#else
+    return std::copy_if(__first, __last, __result, __pred);
+#endif
+}
+
+// TODO: Try to use transform_reduce for combining __brick_copy_if_phase1 on IsVector.
+template <class _DifferenceType, class _ForwardIterator, class _UnaryPredicate>
+std::pair<_DifferenceType, _DifferenceType>
+__brick_calc_mask_1(_ForwardIterator __first, _ForwardIterator __last, bool* __restrict __mask, _UnaryPredicate __pred,
+                    /*vector=*/std::false_type) noexcept
+{
+    auto __count_true = _DifferenceType(0);
+    auto __size = __last - __first;
+
+    static_assert(__are_random_access_iterators<_ForwardIterator>::value,
+                  "Pattern-brick error. Should be a random access iterator.");
+
+    for (; __first != __last; ++__first, ++__mask)
+    {
+        *__mask = __pred(*__first);
+        if (*__mask)
+        {
+            ++__count_true;
+        }
+    }
+    return std::make_pair(__count_true, __size - __count_true);
+}
+
+template <class _DifferenceType, class _RandomAccessIterator, class _UnaryPredicate>
+std::pair<_DifferenceType, _DifferenceType>
+__brick_calc_mask_1(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __mask, _UnaryPredicate __pred,
+                    /*vector=*/std::true_type) noexcept
+{
+    auto __result = __unseq_backend::__simd_calc_mask_1(__first, __last - __first, __mask, __pred);
+    return std::make_pair(__result, (__last - __first) - __result);
+}
+
+template <class _ForwardIterator, class _OutputIterator, class _Assigner>
+void
+__brick_copy_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result, bool* __mask,
+                     _Assigner __assigner, /*vector=*/std::false_type) noexcept
+{
+    for (; __first != __last; ++__first, ++__mask)
+    {
+        if (*__mask)
+        {
+            __assigner(__first, __result);
+            ++__result;
+        }
+    }
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Assigner>
+void
+__brick_copy_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+                     bool* __restrict __mask, _Assigner __assigner, /*vector=*/std::true_type) noexcept
+{
+#if defined(_PSTL_MONOTONIC_PRESENT)
+    __unseq_backend::__simd_copy_by_mask(__first, __last - __first, __result, __mask, __assigner);
+#else
+    __internal::__brick_copy_by_mask(__first, __last, __result, __mask, __assigner, std::false_type());
+#endif
+}
+
+template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2>
+void
+__brick_partition_by_mask(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true,
+                          _OutputIterator2 __out_false, bool* __mask, /*vector=*/std::false_type) noexcept
+{
+    for (; __first != __last; ++__first, ++__mask)
+    {
+        if (*__mask)
+        {
+            *__out_true = *__first;
+            ++__out_true;
+        }
+        else
+        {
+            *__out_false = *__first;
+            ++__out_false;
+        }
+    }
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3>
+void
+__brick_partition_by_mask(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last,
+                          _RandomAccessIterator2 __out_true, _RandomAccessIterator3 __out_false, bool* __mask,
+                          /*vector=*/std::true_type) noexcept
+{
+#if defined(_PSTL_MONOTONIC_PRESENT)
+    __unseq_backend::__simd_partition_by_mask(__first, __last - __first, __out_true, __out_false, __mask);
+#else
+    __internal::__brick_partition_by_mask(__first, __last, __out_true, __out_false, __mask, std::false_type());
+#endif
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryPredicate>
+_OutputIterator
+__pattern_copy_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+                  _UnaryPredicate __pred) noexcept
+{
+    return __internal::__brick_copy_if(__first, __last, __result, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _UnaryPredicate>
+_RandomAccessIterator2
+__pattern_copy_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                  _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _UnaryPredicate __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType;
+    const _DifferenceType __n = __last - __first;
+    if (_DifferenceType(1) < __n)
+    {
+        __par_backend::__buffer<bool> __mask_buf(__n);
+        return __internal::__except_handler(
+            [&__exec, __n, __first, __result, __pred, &__mask_buf]()
+            {
+                bool* __mask = __mask_buf.get();
+                _DifferenceType __m{};
+                __par_backend::__parallel_strict_scan(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
+                    [=](_DifferenceType __i, _DifferenceType __len) { // Reduce
+                        return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
+                                                                                __mask + __i, __pred, _IsVector{})
+                            .first;
+                    },
+                    std::plus<_DifferenceType>(),                                                // Combine
+                    [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
+                        __internal::__brick_copy_by_mask(
+                            __first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
+                            [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; }, _IsVector{});
+                    },
+                    [&__m](_DifferenceType __total) { __m = __total; });
+                return __result + __m;
+            });
+    }
+    // trivial sequence - use serial algorithm
+    return __internal::__brick_copy_if(__first, __last, __result, __pred, _IsVector{});
+}
+
+//------------------------------------------------------------------------
+// count
+//------------------------------------------------------------------------
+template <class _RandomAccessIterator, class _Predicate>
+typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type
+__brick_count(_RandomAccessIterator __first, _RandomAccessIterator __last, _Predicate __pred,
+              /* is_vector = */ std::true_type) noexcept
+{
+    return __unseq_backend::__simd_count(__first, __last - __first, __pred);
+}
+
+template <class _ForwardIterator, class _Predicate>
+typename std::iterator_traits<_ForwardIterator>::
diff erence_type
+__brick_count(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred,
+              /* is_vector = */ std::false_type) noexcept
+{
+    return std::count_if(__first, __last, __pred);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+typename std::iterator_traits<_ForwardIterator>::
diff erence_type
+__pattern_count(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) noexcept
+{
+    return __internal::__brick_count(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Predicate>
+typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type
+__pattern_count(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                _RandomAccessIterator __last, _Predicate __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _SizeType;
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __par_backend::__parallel_reduce(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, _SizeType(0),
+                [__pred](_RandomAccessIterator __begin, _RandomAccessIterator __end, _SizeType __value) -> _SizeType
+                { return __value + __internal::__brick_count(__begin, __end, __pred, _IsVector{}); },
+                std::plus<_SizeType>());
+        });
+}
+
+//------------------------------------------------------------------------
+// unique
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred,
+               /*is_vector=*/std::false_type) noexcept
+{
+    return std::unique(__first, __last, __pred);
+}
+
+template <class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__brick_unique(_RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred,
+               /*is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::unique(__first, __last, __pred);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator
+__pattern_unique(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                 _BinaryPredicate __pred) noexcept
+{
+    return __internal::__brick_unique(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+// That function is shared between two algorithms - remove_if (__pattern_remove_if) and unique (pattern unique). But a mask calculation is 
diff erent.
+// So, a caller passes _CalcMask brick into remove_elements.
+template <class _IsVector, class _ExecutionPolicy, class _ForwardIterator, class _CalcMask>
+_ForwardIterator
+__remove_elements(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator __first,
+                  _ForwardIterator __last, _CalcMask __calc_mask)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_ForwardIterator>::
diff erence_type _DifferenceType;
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _Tp;
+    _DifferenceType __n = __last - __first;
+    __par_backend::__buffer<bool> __mask_buf(__n);
+    // 1. find a first iterator that should be removed
+    return __internal::__except_handler([&]() {
+        bool* __mask = __mask_buf.get();
+        _DifferenceType __min = __par_backend::__parallel_reduce(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), _DifferenceType(0), __n, __n,
+            [__first, __mask, &__calc_mask](_DifferenceType __i, _DifferenceType __j,
+                                            _DifferenceType __local_min) -> _DifferenceType
+            {
+                // Create mask
+                __calc_mask(__mask + __i, __mask + __j, __first + __i);
+
+                // if minimum was found in a previous range we shouldn't do anymore
+                if (__local_min < __i)
+                {
+                    return __local_min;
+                }
+                // find first iterator that should be removed
+                bool* __result = __internal::__brick_find_if(
+                    __mask + __i, __mask + __j, [](bool __val) { return !__val; }, _IsVector{});
+                if (__result - __mask == __j)
+                {
+                    return __local_min;
+                }
+                return std::min(__local_min, _DifferenceType(__result - __mask));
+            },
+            [](_DifferenceType __local_min1, _DifferenceType __local_min2) -> _DifferenceType
+            { return std::min(__local_min1, __local_min2); });
+
+        // No elements to remove - exit
+        if (__min == __n)
+        {
+            return __last;
+        }
+        __n -= __min;
+        __first += __min;
+
+        __par_backend::__buffer<_Tp> __buf(__n);
+        _Tp* __result = __buf.get();
+        __mask += __min;
+        _DifferenceType __m{};
+        // 2. Elements that doesn't satisfy pred are moved to result
+        __par_backend::__parallel_strict_scan(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
+            [__mask](_DifferenceType __i, _DifferenceType __len)
+            {
+                return __internal::__brick_count(
+                    __mask + __i, __mask + __i + __len, [](bool __val) { return __val; }, _IsVector{});
+            },
+            std::plus<_DifferenceType>(),
+            [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial)
+            {
+                __internal::__brick_copy_by_mask(
+                    __first + __i, __first + __i + __len, __result + __initial, __mask + __i,
+                    [](_ForwardIterator __x, _Tp* __z)
+                    {
+                        __internal::__invoke_if_else(
+                            std::is_trivial<_Tp>(), [&]() { *__z = std::move(*__x); },
+                            [&]() { ::new (std::addressof(*__z)) _Tp(std::move(*__x)); });
+                    },
+                    _IsVector{});
+            },
+            [&__m](_DifferenceType __total) { __m = __total; });
+
+        // 3. Elements from result are moved to [first, last)
+        __par_backend::__parallel_for(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __result, __result + __m,
+            [__result, __first](_Tp* __i, _Tp* __j)
+            {
+                __invoke_if_else(
+                    std::is_trivial<_Tp>(), [&]() { __brick_move(__i, __j, __first + (__i - __result), _IsVector{}); },
+                    [&]() { __brick_move_destroy()(__i, __j, __first + (__i - __result), _IsVector{}); });
+            });
+        return __first + __m;
+    });
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__pattern_unique(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                 _RandomAccessIterator __last, _BinaryPredicate __pred) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType;
+
+    if (__first == __last)
+    {
+        return __last;
+    }
+    if (__first + 1 == __last || __first + 2 == __last)
+    {
+        // Trivial sequence - use serial algorithm
+        return __internal::__brick_unique(__first, __last, __pred, _IsVector{});
+    }
+    return __internal::__remove_elements(
+        __tag, std::forward<_ExecutionPolicy>(__exec), ++__first, __last,
+        [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it)
+        {
+            __internal::__brick_walk3(
+                __b, __e, __it - 1, __it,
+                [&__pred](bool& __x, _ReferenceType __y, _ReferenceType __z) { __x = !__pred(__y, __z); }, _IsVector{});
+        });
+}
+
+//------------------------------------------------------------------------
+// unique_copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class OutputIterator, class _BinaryPredicate>
+OutputIterator
+__brick_unique_copy(_ForwardIterator __first, _ForwardIterator __last, OutputIterator __result, _BinaryPredicate __pred,
+                    /*vector=*/std::false_type) noexcept
+{
+    return std::unique_copy(__first, __last, __result, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _BinaryPredicate>
+_RandomAccessIterator2
+__brick_unique_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result,
+                    _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
+{
+#if defined(_PSTL_MONOTONIC_PRESENT)
+    return __unseq_backend::__simd_unique_copy(__first, __last - __first, __result, __pred);
+#else
+    return std::unique_copy(__first, __last, __result, __pred);
+#endif
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryPredicate>
+_OutputIterator
+__pattern_unique_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                      _OutputIterator __result, _BinaryPredicate __pred) noexcept
+{
+    return __internal::__brick_unique_copy(__first, __last, __result, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
+_DifferenceType
+__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
+                    _BinaryPredicate __pred, /*vector=*/std::false_type) noexcept
+{
+    _DifferenceType __count = 0;
+    for (; __first != __last; ++__first, ++__mask)
+    {
+        *__mask = !__pred(*__first, *(__first - 1));
+        __count += *__mask;
+    }
+    return __count;
+}
+
+template <class _DifferenceType, class _RandomAccessIterator, class _BinaryPredicate>
+_DifferenceType
+__brick_calc_mask_2(_RandomAccessIterator __first, _RandomAccessIterator __last, bool* __restrict __mask,
+                    _BinaryPredicate __pred, /*vector=*/std::true_type) noexcept
+{
+    return __unseq_backend::__simd_calc_mask_2(__first, __last - __first, __mask, __pred);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryPredicate>
+_RandomAccessIterator2
+__pattern_unique_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                      _RandomAccessIterator1 __last, _RandomAccessIterator2 __result, _BinaryPredicate __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType;
+    const _DifferenceType __n = __last - __first;
+    if (_DifferenceType(2) < __n)
+    {
+        __par_backend::__buffer<bool> __mask_buf(__n);
+        if (_DifferenceType(2) < __n)
+        {
+            return __internal::__except_handler(
+                [&__exec, __n, __first, __result, __pred, &__mask_buf]()
+                {
+                    bool* __mask = __mask_buf.get();
+                    _DifferenceType __m{};
+                    __par_backend::__parallel_strict_scan(
+                        __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, _DifferenceType(0),
+                        [=](_DifferenceType __i, _DifferenceType __len) -> _DifferenceType { // Reduce
+                            _DifferenceType __extra = 0;
+                            if (__i == 0)
+                            {
+                                // Special boundary case
+                                __mask[__i] = true;
+                                if (--__len == 0)
+                                    return 1;
+                                ++__i;
+                                ++__extra;
+                            }
+                            return __internal::__brick_calc_mask_2<_DifferenceType>(
+                                       __first + __i, __first + (__i + __len), __mask + __i, __pred, _IsVector{}) +
+                                   __extra;
+                        },
+                        std::plus<_DifferenceType>(),                                                // Combine
+                        [=](_DifferenceType __i, _DifferenceType __len, _DifferenceType __initial) { // Scan
+                            // Phase 2 is same as for __pattern_copy_if
+                            __internal::__brick_copy_by_mask(
+                                __first + __i, __first + (__i + __len), __result + __initial, __mask + __i,
+                                [](_RandomAccessIterator1 __x, _RandomAccessIterator2 __z) { *__z = *__x; },
+                                _IsVector{});
+                        },
+                        [&__m](_DifferenceType __total) { __m = __total; });
+                    return __result + __m;
+                });
+        }
+    }
+    // trivial sequence - use serial algorithm
+    return __internal::__brick_unique_copy(__first, __last, __result, __pred, _IsVector{});
+}
+
+//------------------------------------------------------------------------
+// reverse
+//------------------------------------------------------------------------
+template <class _BidirectionalIterator>
+void
+__brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, /*__is_vector=*/std::false_type) noexcept
+{
+    std::reverse(__first, __last);
+}
+
+template <class _RandomAccessIterator>
+void
+__brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, /*__is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType;
+
+    const auto __n = (__last - __first) / 2;
+    __unseq_backend::__simd_walk_2(__first, __n, std::reverse_iterator<_RandomAccessIterator>(__last),
+                                   [](_ReferenceType __x, _ReferenceType __y) {
+                                       using std::swap;
+                                       swap(__x, __y);
+                                   });
+}
+
+// this brick is called in parallel version, so we can use iterator arithmetic
+template <class _BidirectionalIterator>
+void
+__brick_reverse(_BidirectionalIterator __first, _BidirectionalIterator __last, _BidirectionalIterator __d_last,
+                /*is_vector=*/std::false_type) noexcept
+{
+    for (--__d_last; __first != __last; ++__first, --__d_last)
+    {
+        using std::iter_swap;
+        iter_swap(__first, __d_last);
+    }
+}
+
+// this brick is called in parallel version, so we can use iterator arithmetic
+template <class _RandomAccessIterator>
+void
+__brick_reverse(_RandomAccessIterator __first, _RandomAccessIterator __last, _RandomAccessIterator __d_last,
+                /*is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType;
+
+    __unseq_backend::__simd_walk_2(__first, __last - __first, std::reverse_iterator<_RandomAccessIterator>(__d_last),
+                                   [](_ReferenceType __x, _ReferenceType __y) {
+                                       using std::swap;
+                                       swap(__x, __y);
+                                   });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator>
+void
+__pattern_reverse(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last) noexcept
+{
+    __internal::__brick_reverse(__first, __last, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator>
+void
+__pattern_reverse(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                  _RandomAccessIterator __last)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __par_backend::__parallel_for(
+        __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __first + (__last - __first) / 2,
+        [__first, __last](_RandomAccessIterator __inner_first, _RandomAccessIterator __inner_last)
+        { __internal::__brick_reverse(__inner_first, __inner_last, __last - (__inner_first - __first), _IsVector{}); });
+}
+
+//------------------------------------------------------------------------
+// reverse_copy
+//------------------------------------------------------------------------
+
+template <class _BidirectionalIterator, class _OutputIterator>
+_OutputIterator
+__brick_reverse_copy(_BidirectionalIterator __first, _BidirectionalIterator __last, _OutputIterator __d_first,
+                     /*is_vector=*/std::false_type) noexcept
+{
+    return std::reverse_copy(__first, __last, __d_first);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__brick_reverse_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first,
+                     /*is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2;
+
+    return __unseq_backend::__simd_walk_2(std::reverse_iterator<_RandomAccessIterator1>(__last), __last - __first,
+                                          __d_first, [](_ReferenceType1 __x, _ReferenceType2 __y) { __y = __x; });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator, class _OutputIterator>
+_OutputIterator
+__pattern_reverse_copy(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last,
+                       _OutputIterator __d_first) noexcept
+{
+    return __internal::__brick_reverse_copy(__first, __last, __d_first, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__pattern_reverse_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                       _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    auto __len = __last - __first;
+    __par_backend::__parallel_for(
+        __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+        [__first, __len, __d_first](_RandomAccessIterator1 __inner_first, _RandomAccessIterator1 __inner_last)
+        {
+            __internal::__brick_reverse_copy(__inner_first, __inner_last,
+                                             __d_first + (__len - (__inner_last - __first)), _IsVector{});
+        });
+    return __d_first + __len;
+}
+
+//------------------------------------------------------------------------
+// rotate
+//------------------------------------------------------------------------
+template <class _ForwardIterator>
+_ForwardIterator
+__brick_rotate(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
+               /*is_vector=*/std::false_type) noexcept
+{
+#if defined(_PSTL_CPP11_STD_ROTATE_BROKEN)
+    std::rotate(__first, __middle, __last);
+    return std::next(__first, std::distance(__middle, __last));
+#else
+    return std::rotate(__first, __middle, __last);
+#endif
+}
+
+template <class _RandomAccessIterator>
+_RandomAccessIterator
+__brick_rotate(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last,
+               /*is_vector=*/std::true_type) noexcept
+{
+    auto __n = __last - __first;
+    auto __m = __middle - __first;
+    const _RandomAccessIterator __ret = __first + (__last - __middle);
+
+    bool __is_left = (__m <= __n / 2);
+    if (!__is_left)
+        __m = __n - __m;
+
+    while (__n > 1 && __m > 0)
+    {
+        using std::iter_swap;
+        const auto __m_2 = __m * 2;
+        if (__is_left)
+        {
+            for (; __last - __first >= __m_2; __first += __m)
+            {
+                __unseq_backend::__simd_assign(__first, __m, __first + __m,
+                                               iter_swap<_RandomAccessIterator, _RandomAccessIterator>);
+            }
+        }
+        else
+        {
+            for (; __last - __first >= __m_2; __last -= __m)
+            {
+                __unseq_backend::__simd_assign(__last - __m, __m, __last - __m_2,
+                                               iter_swap<_RandomAccessIterator, _RandomAccessIterator>);
+            }
+        }
+        __is_left = !__is_left;
+        __m = __n % __m;
+        __n = __last - __first;
+    }
+
+    return __ret;
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator>
+_ForwardIterator
+__pattern_rotate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle,
+                 _ForwardIterator __last) noexcept
+{
+    return __internal::__brick_rotate(__first, __middle, __last, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator>
+_RandomAccessIterator
+__pattern_rotate(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                 _RandomAccessIterator __middle, _RandomAccessIterator __last)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp;
+    auto __n = __last - __first;
+    auto __m = __middle - __first;
+    if (__m <= __n / 2)
+    {
+        __par_backend::__buffer<_Tp> __buf(__n - __m);
+        return __internal::__except_handler(
+            [&__exec, __n, __m, __first, __middle, __last, &__buf]()
+            {
+                _Tp* __result = __buf.get();
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __middle, __last,
+                    [__middle, __result](_RandomAccessIterator __b, _RandomAccessIterator __e)
+                    { __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __middle), _IsVector{}); });
+
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __middle,
+                    [__last, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e)
+                    { __internal::__brick_move(__b, __e, __b + (__last - __middle), _IsVector{}); });
+
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __result, __result + (__n - __m),
+                    [__first, __result](_Tp* __b, _Tp* __e)
+                    { __brick_move_destroy()(__b, __e, __first + (__b - __result), _IsVector{}); });
+
+                return __first + (__last - __middle);
+            });
+    }
+    else
+    {
+        __par_backend::__buffer<_Tp> __buf(__m);
+        return __internal::__except_handler(
+            [&__exec, __n, __m, __first, __middle, __last, &__buf]()
+            {
+                _Tp* __result = __buf.get();
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __middle,
+                    [__first, __result](_RandomAccessIterator __b, _RandomAccessIterator __e)
+                    { __internal::__brick_uninitialized_move(__b, __e, __result + (__b - __first), _IsVector{}); });
+
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __middle, __last,
+                    [__first, __middle](_RandomAccessIterator __b, _RandomAccessIterator __e)
+                    { __internal::__brick_move(__b, __e, __first + (__b - __middle), _IsVector{}); });
+
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __result, __result + __m,
+                    [__n, __m, __first, __result](_Tp* __b, _Tp* __e)
+                    { __brick_move_destroy()(__b, __e, __first + ((__n - __m) + (__b - __result)), _IsVector{}); });
+
+                return __first + (__last - __middle);
+            });
+    }
+}
+
+//------------------------------------------------------------------------
+// rotate_copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator>
+_OutputIterator
+__brick_rotate_copy(_ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last,
+                    _OutputIterator __result, /*__is_vector=*/std::false_type) noexcept
+{
+    return std::rotate_copy(__first, __middle, __last, __result);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__brick_rotate_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last,
+                    _RandomAccessIterator2 __result, /*__is_vector=*/std::true_type) noexcept
+{
+    _RandomAccessIterator2 __res = __internal::__brick_copy(__middle, __last, __result, std::true_type());
+    return __internal::__brick_copy(__first, __middle, __res, std::true_type());
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator>
+_OutputIterator
+__pattern_rotate_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __middle,
+                      _ForwardIterator __last, _OutputIterator __result) noexcept
+{
+    return __internal::__brick_rotate_copy(__first, __middle, __last, __result, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2>
+_RandomAccessIterator2
+__pattern_rotate_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                      _RandomAccessIterator1 __middle, _RandomAccessIterator1 __last, _RandomAccessIterator2 __result)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __par_backend::__parallel_for(
+        __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+        [__first, __last, __middle, __result](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e)
+        {
+            if (__b > __middle)
+            {
+                __internal::__brick_copy(__b, __e, __result + (__b - __middle), _IsVector{});
+            }
+            else
+            {
+                _RandomAccessIterator2 __new_result = __result + ((__last - __middle) + (__b - __first));
+                if (__e < __middle)
+                {
+                    __internal::__brick_copy(__b, __e, __new_result, _IsVector{});
+                }
+                else
+                {
+                    __internal::__brick_copy(__b, __middle, __new_result, _IsVector{});
+                    __internal::__brick_copy(__middle, __e, __result, _IsVector{});
+                }
+            }
+        });
+    return __result + (__last - __first);
+}
+
+//------------------------------------------------------------------------
+// is_partitioned
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _UnaryPredicate>
+bool
+__brick_is_partitioned(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
+                       /*is_vector=*/std::false_type) noexcept
+{
+    return std::is_partitioned(__first, __last, __pred);
+}
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+bool
+__brick_is_partitioned(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred,
+                       /*is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _SizeType;
+    if (__first == __last)
+    {
+        return true;
+    }
+    else
+    {
+        _RandomAccessIterator __result = __unseq_backend::__simd_first(
+            __first, _SizeType(0), __last - __first,
+            [&__pred](_RandomAccessIterator __it, _SizeType __i) { return !__pred(__it[__i]); });
+        if (__result == __last)
+        {
+            return true;
+        }
+        else
+        {
+            ++__result;
+            return !__unseq_backend::__simd_or(__result, __last - __result, __pred);
+        }
+    }
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+bool
+__pattern_is_partitioned(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                         _UnaryPredicate __pred) noexcept
+{
+    return __internal::__brick_is_partitioned(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+bool
+__pattern_is_partitioned(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                         _RandomAccessIterator __last, _UnaryPredicate __pred)
+{
+    if (__first == __last)
+    {
+        return true;
+    }
+    else
+    {
+        using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+        return __internal::__except_handler([&]() {
+            // State of current range:
+            // broken     - current range is not partitioned by pred
+            // all_true   - all elements in current range satisfy pred
+            // all_false  - all elements in current range don't satisfy pred
+            // true_false - elements satisfy pred are placed before elements that don't satisfy pred
+            enum _ReduceType
+            {
+                __not_init = -1,
+                __broken,
+                __all_true,
+                __all_false,
+                __true_false
+            };
+            _ReduceType __init = __not_init;
+
+            // Array with states that we'll have when state from the left branch is merged with state from the right branch.
+            // State is calculated by formula: new_state = table[left_state * 4 + right_state]
+            _ReduceType __table[] = {__broken,     __broken,     __broken,     __broken, __broken,    __all_true,
+                                     __true_false, __true_false, __broken,     __broken, __all_false, __broken,
+                                     __broken,     __broken,     __true_false, __broken};
+
+            __init = __par_backend::__parallel_reduce(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __init,
+                [&__pred, &__table](_RandomAccessIterator __i, _RandomAccessIterator __j,
+                                    _ReduceType __value) -> _ReduceType
+                {
+                    if (__value == __broken)
+                    {
+                        return __broken;
+                    }
+                    _ReduceType __res = __not_init;
+                    // if first element satisfy pred
+                    if (__pred(*__i))
+                    {
+                        // find first element that don't satisfy pred
+                        _RandomAccessIterator __x =
+                            __internal::__brick_find_if(__i + 1, __j, std::not_fn(__pred), _IsVector{});
+                        if (__x != __j)
+                        {
+                            // find first element after "x" that satisfy pred
+                            _RandomAccessIterator __y = __internal::__brick_find_if(__x + 1, __j, __pred, _IsVector{});
+                            // if it was found then range isn't partitioned by pred
+                            if (__y != __j)
+                            {
+                                return __broken;
+                            }
+                            else
+                            {
+                                __res = __true_false;
+                            }
+                        }
+                        else
+                        {
+                            __res = __all_true;
+                        }
+                    }
+                    else
+                    { // if first element doesn't satisfy pred
+                        // then we should find the first element that satisfy pred.
+                        // If we found it then range isn't partitioned by pred
+                        if (__internal::__brick_find_if(__i + 1, __j, __pred, _IsVector{}) != __j)
+                        {
+                            return __broken;
+                        }
+                        else
+                        {
+                            __res = __all_false;
+                        }
+                    }
+                    // if we have value from left range then we should calculate the result
+                    return (__value == -1) ? __res : __table[__value * 4 + __res];
+                },
+
+                [&__table](_ReduceType __val1, _ReduceType __val2) -> _ReduceType
+                {
+                    if (__val1 == __broken || __val2 == __broken)
+                    {
+                        return __broken;
+                    }
+                    // calculate the result for new big range
+                    return __table[__val1 * 4 + __val2];
+                });
+            return __init != __broken;
+        });
+    }
+}
+
+//------------------------------------------------------------------------
+// partition
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator
+__brick_partition(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
+                  /*is_vector=*/std::false_type) noexcept
+{
+    return std::partition(__first, __last, __pred);
+}
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__brick_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred,
+                  /*is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::partition(__first, __last, __pred);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator
+__pattern_partition(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                    _UnaryPredicate __pred) noexcept
+{
+    return __internal::__brick_partition(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__pattern_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                    _RandomAccessIterator __last, _UnaryPredicate __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    // partitioned range: elements before pivot satisfy pred (true part),
+    //                    elements after pivot don't satisfy pred (false part)
+    struct _PartitionRange
+    {
+        _RandomAccessIterator __begin;
+        _RandomAccessIterator __pivot;
+        _RandomAccessIterator __end;
+    };
+
+    return __internal::__except_handler([&]() {
+        _PartitionRange __init{__last, __last, __last};
+
+        // lambda for merging two partitioned ranges to one partitioned range
+        auto __reductor = [&__exec](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange
+        {
+            auto __size1 = __val1.__end - __val1.__pivot;
+            auto __size2 = __val2.__pivot - __val2.__begin;
+            auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin);
+
+            // if all elements in left range satisfy pred then we can move new pivot to pivot of right range
+            if (__val1.__end == __val1.__pivot)
+            {
+                return {__new_begin, __val2.__pivot, __val2.__end};
+            }
+            // if true part of right range greater than false part of left range
+            // then we should swap the false part of left range and last part of true part of right range
+            else if (__size2 > __size1)
+            {
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size1,
+                    [__val1, __val2, __size1](_RandomAccessIterator __i, _RandomAccessIterator __j) {
+                        __internal::__brick_swap_ranges(__i, __j, (__val2.__pivot - __size1) + (__i - __val1.__pivot),
+                                                        _IsVector{});
+                    });
+                return {__new_begin, __val2.__pivot - __size1, __val2.__end};
+            }
+            // else we should swap the first part of false part of left range and true part of right range
+            else
+            {
+                __par_backend::__parallel_for(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __val1.__pivot, __val1.__pivot + __size2,
+                    [__val1, __val2](_RandomAccessIterator __i, _RandomAccessIterator __j) {
+                        __internal::__brick_swap_ranges(__i, __j, __val2.__begin + (__i - __val1.__pivot), _IsVector{});
+                    });
+                return {__new_begin, __val1.__pivot + __size2, __val2.__end};
+            }
+        };
+
+        _PartitionRange __result = __par_backend::__parallel_reduce(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __init,
+            [__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j,
+                                 _PartitionRange __value) -> _PartitionRange
+            {
+                //1. serial partition
+                _RandomAccessIterator __pivot = __internal::__brick_partition(__i, __j, __pred, _IsVector{});
+
+                // 2. merging of two ranges (left and right respectively)
+                return __reductor(__value, {__i, __pivot, __j});
+            },
+            __reductor);
+        return __result.__pivot;
+    });
+}
+
+//------------------------------------------------------------------------
+// stable_partition
+//------------------------------------------------------------------------
+
+template <class _BidirectionalIterator, class _UnaryPredicate>
+_BidirectionalIterator
+__brick_stable_partition(_BidirectionalIterator __first, _BidirectionalIterator __last, _UnaryPredicate __pred,
+                         /*__is_vector=*/std::false_type) noexcept
+{
+    return std::stable_partition(__first, __last, __pred);
+}
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__brick_stable_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred,
+                         /*__is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::stable_partition(__first, __last, __pred);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator, class _UnaryPredicate>
+_BidirectionalIterator
+__pattern_stable_partition(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __last,
+                           _UnaryPredicate __pred) noexcept
+{
+    return __internal::__brick_stable_partition(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__pattern_stable_partition(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                           _RandomAccessIterator __last, _UnaryPredicate __pred) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    // partitioned range: elements before pivot satisfy pred (true part),
+    //                    elements after pivot don't satisfy pred (false part)
+    struct _PartitionRange
+    {
+        _RandomAccessIterator __begin;
+        _RandomAccessIterator __pivot;
+        _RandomAccessIterator __end;
+    };
+
+    return __internal::__except_handler([&]() {
+        _PartitionRange __init{__last, __last, __last};
+
+        // lambda for merging two partitioned ranges to one partitioned range
+        auto __reductor = [](_PartitionRange __val1, _PartitionRange __val2) -> _PartitionRange
+        {
+            auto __size1 = __val1.__end - __val1.__pivot;
+            auto __new_begin = __val2.__begin - (__val1.__end - __val1.__begin);
+
+            // if all elements in left range satisfy pred then we can move new pivot to pivot of right range
+            if (__val1.__end == __val1.__pivot)
+            {
+                return {__new_begin, __val2.__pivot, __val2.__end};
+            }
+            // if true part of right range greater than false part of left range
+            // then we should swap the false part of left range and last part of true part of right range
+            else
+            {
+                __internal::__brick_rotate(__val1.__pivot, __val2.__begin, __val2.__pivot, _IsVector{});
+                return {__new_begin, __val2.__pivot - __size1, __val2.__end};
+            }
+        };
+
+        _PartitionRange __result = __par_backend::__parallel_reduce(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __init,
+            [&__pred, __reductor](_RandomAccessIterator __i, _RandomAccessIterator __j,
+                                  _PartitionRange __value) -> _PartitionRange
+            {
+                //1. serial stable_partition
+                _RandomAccessIterator __pivot = __internal::__brick_stable_partition(__i, __j, __pred, _IsVector{});
+
+                // 2. merging of two ranges (left and right respectively)
+                return __reductor(__value, {__i, __pivot, __j});
+            },
+            __reductor);
+        return __result.__pivot;
+    });
+}
+
+//------------------------------------------------------------------------
+// partition_copy
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator1, class _OutputIterator2, class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2>
+__brick_partition_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator1 __out_true,
+                       _OutputIterator2 __out_false, _UnaryPredicate __pred, /*is_vector=*/std::false_type) noexcept
+{
+    return std::partition_copy(__first, __last, __out_true, __out_false, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3,
+          class _UnaryPredicate>
+std::pair<_RandomAccessIterator2, _RandomAccessIterator3>
+__brick_partition_copy(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true,
+                       _RandomAccessIterator3 __out_false, _UnaryPredicate __pred,
+                       /*is_vector=*/std::true_type) noexcept
+{
+#if defined(_PSTL_MONOTONIC_PRESENT)
+    return __unseq_backend::__simd_partition_copy(__first, __last - __first, __out_true, __out_false, __pred);
+#else
+    return std::partition_copy(__first, __last, __out_true, __out_false, __pred);
+#endif
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator1, class _OutputIterator2,
+          class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2>
+__pattern_partition_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                         _OutputIterator1 __out_true, _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept
+{
+    return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred,
+                                              typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _UnaryPredicate>
+std::pair<_RandomAccessIterator2, _RandomAccessIterator3>
+__pattern_partition_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                         _RandomAccessIterator1 __last, _RandomAccessIterator2 __out_true,
+                         _RandomAccessIterator3 __out_false, _UnaryPredicate __pred)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType;
+    typedef std::pair<_DifferenceType, _DifferenceType> _ReturnType;
+    const _DifferenceType __n = __last - __first;
+    if (_DifferenceType(1) < __n)
+    {
+        __par_backend::__buffer<bool> __mask_buf(__n);
+        return __internal::__except_handler(
+            [&__exec, __n, __first, __out_true, __out_false, __pred, &__mask_buf]()
+            {
+                bool* __mask = __mask_buf.get();
+                _ReturnType __m{};
+                __par_backend::__parallel_strict_scan(
+                    __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n,
+                    std::make_pair(_DifferenceType(0), _DifferenceType(0)),
+                    [=](_DifferenceType __i, _DifferenceType __len) { // Reduce
+                        return __internal::__brick_calc_mask_1<_DifferenceType>(__first + __i, __first + (__i + __len),
+                                                                                __mask + __i, __pred, _IsVector{});
+                    },
+                    [](const _ReturnType& __x, const _ReturnType& __y) -> _ReturnType
+                    { return std::make_pair(__x.first + __y.first, __x.second + __y.second); }, // Combine
+                    [=](_DifferenceType __i, _DifferenceType __len, _ReturnType __initial) {    // Scan
+                        __internal::__brick_partition_by_mask(
+                            __first + __i, __first + (__i + __len), __out_true + __initial.first,
+                            __out_false + __initial.second, __mask + __i, _IsVector{});
+                    },
+                    [&__m](_ReturnType __total) { __m = __total; });
+                return std::make_pair(__out_true + __m.first, __out_false + __m.second);
+            });
+    }
+    // trivial sequence - use serial algorithm
+    return __internal::__brick_partition_copy(__first, __last, __out_true, __out_false, __pred, _IsVector{});
+}
+
+//------------------------------------------------------------------------
+// sort
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare, class _IsMoveConstructible>
+void
+__pattern_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+               _IsMoveConstructible) noexcept
+{
+    std::sort(__first, __last, __comp);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_sort(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+               _RandomAccessIterator __last, _Compare __comp, /*is_move_constructible=*/std::true_type)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_stable_sort(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp,
+                [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+                { std::sort(__first, __last, __comp); });
+        });
+}
+
+//------------------------------------------------------------------------
+// stable_sort
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_stable_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last,
+                      _Compare __comp) noexcept
+{
+    std::stable_sort(__first, __last, __comp);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_stable_sort(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                      _RandomAccessIterator __last, _Compare __comp)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_stable_sort(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp,
+                [](_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+                { std::stable_sort(__first, __last, __comp); });
+        });
+}
+
+//------------------------------------------------------------------------
+// partial_sort
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_partial_sort(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __middle,
+                       _RandomAccessIterator __last, _Compare __comp) noexcept
+{
+    std::partial_sort(__first, __middle, __last, __comp);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_partial_sort(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                       _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    const auto __n = __middle - __first;
+    if (__n == 0)
+        return;
+
+    __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_stable_sort(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp,
+                [__n](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Compare __comp)
+                {
+                    if (__n < __end - __begin)
+                        std::partial_sort(__begin, __begin + __n, __end, __comp);
+                    else
+                        std::sort(__begin, __end, __comp);
+                },
+                __n);
+        });
+}
+
+//------------------------------------------------------------------------
+// partial_sort_copy
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__pattern_partial_sort_copy(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                            _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp) noexcept
+{
+    return std::partial_sort_copy(__first, __last, __d_first, __d_last, __comp);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+_RandomAccessIterator2
+__pattern_partial_sort_copy(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first,
+                            _RandomAccessIterator1 __last, _RandomAccessIterator2 __d_first,
+                            _RandomAccessIterator2 __d_last, _Compare __comp)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__last == __first || __d_last == __d_first)
+    {
+        return __d_first;
+    }
+    auto __n1 = __last - __first;
+    auto __n2 = __d_last - __d_first;
+    return __internal::__except_handler([&]() {
+        if (__n2 >= __n1)
+        {
+            __par_backend::__parallel_stable_sort(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __d_first, __d_first + __n1, __comp,
+                [__first, __d_first](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j, _Compare __comp)
+                {
+                    _RandomAccessIterator1 __i1 = __first + (__i - __d_first);
+                    _RandomAccessIterator1 __j1 = __first + (__j - __d_first);
+
+                // 1. Copy elements from input to output
+#if !defined(_PSTL_ICC_18_OMP_SIMD_BROKEN)
+                    __internal::__brick_copy(__i1, __j1, __i, _IsVector{});
+#else
+                    std::copy(__i1, __j1, __i);
+#endif
+                    // 2. Sort elements in output sequence
+                    std::sort(__i, __j, __comp);
+                },
+                __n1);
+            return __d_first + __n1;
+        }
+        else
+        {
+            typedef typename std::iterator_traits<_RandomAccessIterator1>::value_type _T1;
+            typedef typename std::iterator_traits<_RandomAccessIterator2>::value_type _T2;
+            __par_backend::__buffer<_T1> __buf(__n1);
+            _T1* __r = __buf.get();
+
+            __par_backend::__parallel_stable_sort(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r, __r + __n1, __comp,
+                [__n2, __first, __r](_T1* __i, _T1* __j, _Compare __comp)
+                {
+                    _RandomAccessIterator1 __it = __first + (__i - __r);
+
+                    // 1. Copy elements from input to raw memory
+                    for (_T1* __k = __i; __k != __j; ++__k, ++__it)
+                    {
+                        ::new (__k) _T2(*__it);
+                    }
+
+                    // 2. Sort elements in temporary __buffer
+                    if (__n2 < __j - __i)
+                        std::partial_sort(__i, __i + __n2, __j, __comp);
+                    else
+                        std::sort(__i, __j, __comp);
+                },
+                __n2);
+
+            // 3. Move elements from temporary __buffer to output
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r, __r + __n2,
+                                          [__r, __d_first](_T1* __i, _T1* __j)
+                                          { __brick_move_destroy()(__i, __j, __d_first + (__i - __r), _IsVector{}); });
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r + __n2,
+                                          __r + __n1,
+                                          [](_T1* __i, _T1* __j) { __brick_destroy(__i, __j, _IsVector{}); });
+
+            return __d_first + __n2;
+        }
+    });
+}
+
+//------------------------------------------------------------------------
+// adjacent_find
+//------------------------------------------------------------------------
+template <class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__brick_adjacent_find(_RandomAccessIterator __first, _RandomAccessIterator __last, _BinaryPredicate __pred,
+                      /* IsVector = */ std::true_type, bool __or_semantic) noexcept
+{
+    return __unseq_backend::__simd_adjacent_find(__first, __last, __pred, __or_semantic);
+}
+
+template <class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator
+__brick_adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred,
+                      /* IsVector = */ std::false_type, bool) noexcept
+{
+    return std::adjacent_find(__first, __last, __pred);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+_ForwardIterator
+__pattern_adjacent_find(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                        _BinaryPredicate __pred, bool __or_semantic) noexcept
+{
+    return __internal::__brick_adjacent_find(__first, __last, __pred, typename _Tag::__is_vector{}, __or_semantic);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _BinaryPredicate>
+_RandomAccessIterator
+__pattern_adjacent_find(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                        _RandomAccessIterator __last, _BinaryPredicate __pred, bool __or_semantic)
+{
+    if (__last - __first < 2)
+        return __last;
+
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __par_backend::__parallel_reduce(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last, __last,
+                [__last, __pred, __or_semantic](_RandomAccessIterator __begin, _RandomAccessIterator __end,
+                                                _RandomAccessIterator __value) -> _RandomAccessIterator
+                {
+                    // TODO: investigate performance benefits from the use of shared variable for the result,
+                    // checking (compare_and_swap idiom) its __value at __first.
+                    if (__or_semantic && __value < __last)
+                    { //found
+                        __par_backend::__cancel_execution();
+                        return __value;
+                    }
+
+                    if (__value > __begin)
+                    {
+                        // modify __end to check the predicate on the boundary __values;
+                        // TODO: to use a custom range with boundaries overlapping
+                        // TODO: investigate what if we remove "if" below and run algorithm on range [__first, __last-1)
+                        // then check the pair [__last-1, __last)
+                        if (__end != __last)
+                            ++__end;
+
+                        //correct the global result iterator if the "brick" returns a local "__last"
+                        const _RandomAccessIterator __res =
+                            __internal::__brick_adjacent_find(__begin, __end, __pred, _IsVector{}, __or_semantic);
+                        if (__res < __end)
+                            __value = __res;
+                    }
+                    return __value;
+                },
+                [](_RandomAccessIterator __x, _RandomAccessIterator __y) -> _RandomAccessIterator
+                { return __x < __y ? __x : __y; } //reduce a __value
+            );
+        });
+}
+
+//------------------------------------------------------------------------
+// nth_element
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_nth_element(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __nth,
+                      _RandomAccessIterator __last, _Compare __comp) noexcept
+{
+    std::nth_element(__first, __nth, __last, __comp);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_nth_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                      _RandomAccessIterator __nth, _RandomAccessIterator __last, _Compare __comp) noexcept
+{
+    if (__first == __last || __nth == __last)
+    {
+        return;
+    }
+
+    using std::iter_swap;
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp;
+    _RandomAccessIterator __x;
+    do
+    {
+        __x = __internal::__pattern_partition(__tag, std::forward<_ExecutionPolicy>(__exec), __first + 1, __last,
+                                              [&__comp, __first](const _Tp& __x) { return __comp(__x, *__first); });
+        --__x;
+        if (__x != __first)
+        {
+            iter_swap(__first, __x);
+        }
+        // if x > nth then our new range for partition is [first, x)
+        if (__x - __nth > 0)
+        {
+            __last = __x;
+        }
+        // if x < nth then our new range for partition is [x, last)
+        else if (__x - __nth < 0)
+        {
+            // if *x == *nth then we can start new partition with x+1
+            if (!__comp(*__nth, *__x) && !__comp(*__x, *__nth))
+            {
+                ++__x;
+            }
+            else
+            {
+                iter_swap(__nth, __x);
+            }
+            __first = __x;
+        }
+    } while (__x != __nth);
+}
+
+//------------------------------------------------------------------------
+// fill, fill_n
+//------------------------------------------------------------------------
+template <class _RandomAccessIterator, class _Tp>
+void
+__brick_fill(_RandomAccessIterator __first, _RandomAccessIterator __last, const _Tp& __value,
+             /* __is_vector = */ std::true_type) noexcept
+{
+    __unseq_backend::__simd_fill_n(__first, __last - __first, __value);
+}
+
+template <class _ForwardIterator, class _Tp>
+void
+__brick_fill(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value,
+             /* __is_vector = */ std::false_type) noexcept
+{
+    std::fill(__first, __last, __value);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+void
+__pattern_fill(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) noexcept
+{
+    __internal::__brick_fill(__first, __last, __value, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Tp>
+_RandomAccessIterator
+__pattern_fill(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+               _RandomAccessIterator __last, const _Tp& __value)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&__exec, __first, __last, &__value]()
+        {
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                          [&__value](_RandomAccessIterator __begin, _RandomAccessIterator __end)
+                                          { __internal::__brick_fill(__begin, __end, __value, _IsVector{}); });
+            return __last;
+        });
+}
+
+template <class _RandomAccessIterator, class _Size, class _Tp>
+_RandomAccessIterator
+__brick_fill_n(_RandomAccessIterator __first, _Size __count, const _Tp& __value,
+               /* __is_vector = */ std::true_type) noexcept
+{
+    return __unseq_backend::__simd_fill_n(__first, __count, __value);
+}
+
+template <class _OutputIterator, class _Size, class _Tp>
+_OutputIterator
+__brick_fill_n(_OutputIterator __first, _Size __count, const _Tp& __value, /* __is_vector = */ std::false_type) noexcept
+{
+    return std::fill_n(__first, __count, __value);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _OutputIterator, class _Size, class _Tp>
+_OutputIterator
+__pattern_fill_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, const _Tp& __value) noexcept
+{
+    return __internal::__brick_fill_n(__first, __count, __value, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Tp>
+_RandomAccessIterator
+__pattern_fill_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                 _Size __count, const _Tp& __value)
+{
+    return __internal::__pattern_fill(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __count,
+                                      __value);
+}
+
+//------------------------------------------------------------------------
+// generate, generate_n
+//------------------------------------------------------------------------
+template <class _RandomAccessIterator, class _Generator>
+void
+__brick_generate(_RandomAccessIterator __first, _RandomAccessIterator __last, _Generator __g,
+                 /* is_vector = */ std::true_type) noexcept
+{
+    __unseq_backend::__simd_generate_n(__first, __last - __first, __g);
+}
+
+template <class _ForwardIterator, class _Generator>
+void
+__brick_generate(_ForwardIterator __first, _ForwardIterator __last, _Generator __g,
+                 /* is_vector = */ std::false_type) noexcept
+{
+    std::generate(__first, __last, __g);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Generator>
+void
+__pattern_generate(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Generator __g) noexcept
+{
+    __internal::__brick_generate(__first, __last, __g, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Generator>
+_RandomAccessIterator
+__pattern_generate(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                   _RandomAccessIterator __last, _Generator __g)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                          [__g](_RandomAccessIterator __begin, _RandomAccessIterator __end)
+                                          { __internal::__brick_generate(__begin, __end, __g, _IsVector{}); });
+            return __last;
+        });
+}
+
+template <class _RandomAccessIterator, class Size, class _Generator>
+_RandomAccessIterator
+__brick_generate_n(_RandomAccessIterator __first, Size __count, _Generator __g,
+                   /* is_vector = */ std::true_type) noexcept
+{
+    return __unseq_backend::__simd_generate_n(__first, __count, __g);
+}
+
+template <class OutputIterator, class Size, class _Generator>
+OutputIterator
+__brick_generate_n(OutputIterator __first, Size __count, _Generator __g, /* is_vector = */ std::false_type) noexcept
+{
+    return std::generate_n(__first, __count, __g);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _OutputIterator, class _Size, class _Generator>
+_OutputIterator
+__pattern_generate_n(_Tag, _ExecutionPolicy&&, _OutputIterator __first, _Size __count, _Generator __g) noexcept
+{
+    return __internal::__brick_generate_n(__first, __count, __g, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Size, class _Generator>
+_RandomAccessIterator
+__pattern_generate_n(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                     _Size __count, _Generator __g)
+{
+    static_assert(__are_random_access_iterators<_RandomAccessIterator>::value,
+                  "Pattern-brick error. Should be a random access iterator.");
+    return __internal::__pattern_generate(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __first + __count,
+                                          __g);
+}
+
+//------------------------------------------------------------------------
+// remove
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator
+__brick_remove_if(_ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
+                  /* __is_vector = */ std::false_type) noexcept
+{
+    return std::remove_if(__first, __last, __pred);
+}
+
+template <class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__brick_remove_if(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryPredicate __pred,
+                  /* __is_vector = */ std::true_type) noexcept
+{
+#if defined(_PSTL_MONOTONIC_PRESENT)
+    return __unseq_backend::__simd_remove_if(__first, __last - __first, __pred);
+#else
+    return std::remove_if(__first, __last, __pred);
+#endif
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+_ForwardIterator
+__pattern_remove_if(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                    _UnaryPredicate __pred) noexcept
+{
+    return __internal::__brick_remove_if(__first, __last, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryPredicate>
+_RandomAccessIterator
+__pattern_remove_if(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                    _RandomAccessIterator __last, _UnaryPredicate __pred) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::reference _ReferenceType;
+
+    if (__first == __last || __first + 1 == __last)
+    {
+        // Trivial sequence - use serial algorithm
+        return __internal::__brick_remove_if(__first, __last, __pred, _IsVector{});
+    }
+
+    return __internal::__remove_elements(
+        __tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+        [&__pred](bool* __b, bool* __e, _RandomAccessIterator __it)
+        {
+            __internal::__brick_walk2(
+                __b, __e, __it, [&__pred](bool& __x, _ReferenceType __y) { __x = !__pred(__y); }, _IsVector{});
+        });
+}
+
+//------------------------------------------------------------------------
+// merge
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator
+__brick_merge(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+              _ForwardIterator2 __last2, _OutputIterator __d_first, _Compare __comp,
+              /* __is_vector = */ std::false_type) noexcept
+{
+    return std::merge(__first1, __last1, __first2, __last2, __d_first, __comp);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__brick_merge(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+              _RandomAccessIterator2 __last2, _RandomAccessIterator3 __d_first, _Compare __comp,
+              /* __is_vector = */ std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::merge(__first1, __last1, __first2, __last2, __d_first, __comp);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_merge(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __d_first,
+                _Compare __comp) noexcept
+{
+    return __internal::__brick_merge(__first1, __last1, __first2, __last2, __d_first, __comp,
+                                     typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__pattern_merge(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                _RandomAccessIterator3 __d_first, _Compare __comp)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    __par_backend::__parallel_merge(
+        __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first,
+        __comp,
+        [](_RandomAccessIterator1 __f1, _RandomAccessIterator1 __l1, _RandomAccessIterator2 __f2,
+           _RandomAccessIterator2 __l2, _RandomAccessIterator3 __f3, _Compare __comp)
+        { return __internal::__brick_merge(__f1, __l1, __f2, __l2, __f3, __comp, _IsVector{}); });
+    return __d_first + (__last1 - __first1) + (__last2 - __first2);
+}
+
+//------------------------------------------------------------------------
+// inplace_merge
+//------------------------------------------------------------------------
+template <class _BidirectionalIterator, class _Compare>
+void
+__brick_inplace_merge(_BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last,
+                      _Compare __comp, /* __is_vector = */ std::false_type) noexcept
+{
+    std::inplace_merge(__first, __middle, __last, __comp);
+}
+
+template <class _RandomAccessIterator, class _Compare>
+void
+__brick_inplace_merge(_RandomAccessIterator __first, _RandomAccessIterator __middle, _RandomAccessIterator __last,
+                      _Compare __comp, /* __is_vector = */ std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial")
+    std::inplace_merge(__first, __middle, __last, __comp);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _BidirectionalIterator, class _Compare>
+void
+__pattern_inplace_merge(_Tag, _ExecutionPolicy&&, _BidirectionalIterator __first, _BidirectionalIterator __middle,
+                        _BidirectionalIterator __last, _Compare __comp) noexcept
+{
+    __internal::__brick_inplace_merge(__first, __middle, __last, __comp, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+void
+__pattern_inplace_merge(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                        _RandomAccessIterator __middle, _RandomAccessIterator __last, _Compare __comp)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__first == __last || __first == __middle || __middle == __last)
+    {
+        return;
+    }
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _Tp;
+    auto __n = __last - __first;
+    __par_backend::__buffer<_Tp> __buf(__n);
+    _Tp* __r = __buf.get();
+    __internal::__except_handler(
+        [&]()
+        {
+            auto __move_values = [](_RandomAccessIterator __x, _Tp* __z)
+            {
+                __internal::__invoke_if_else(
+                    std::is_trivial<_Tp>(), [&]() { *__z = std::move(*__x); },
+                    [&]() { ::new (std::addressof(*__z)) _Tp(std::move(*__x)); });
+            };
+
+            auto __move_sequences = [](_RandomAccessIterator __first1, _RandomAccessIterator __last1, _Tp* __first2)
+            { return __internal::__brick_uninitialized_move(__first1, __last1, __first2, _IsVector()); };
+
+            __par_backend::__parallel_merge(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __middle, __middle, __last, __r,
+                __comp,
+                [__n, __move_values, __move_sequences](_RandomAccessIterator __f1, _RandomAccessIterator __l1,
+                                                       _RandomAccessIterator __f2, _RandomAccessIterator __l2,
+                                                       _Tp* __f3, _Compare __comp)
+                {
+                    (__utils::__serial_move_merge(__n))(__f1, __l1, __f2, __l2, __f3, __comp, __move_values,
+                                                        __move_values, __move_sequences, __move_sequences);
+                    return __f3 + (__l1 - __f1) + (__l2 - __f2);
+                });
+            __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __r, __r + __n,
+                                          [__r, __first](_Tp* __i, _Tp* __j)
+                                          { __brick_move_destroy()(__i, __j, __first + (__i - __r), _IsVector{}); });
+        });
+}
+
+//------------------------------------------------------------------------
+// includes
+//------------------------------------------------------------------------
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+bool
+__pattern_includes(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept
+{
+    return std::includes(__first1, __last1, __first2, __last2, __comp);
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+bool
+__pattern_includes(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                   _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                   _Compare __comp)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__first2 >= __last2)
+        return true;
+
+    if (__first1 >= __last1 || __comp(*__first2, *__first1) || __comp(*(__last1 - 1), *(__last2 - 1)))
+        return false;
+
+    __first1 = std::lower_bound(__first1, __last1, *__first2, __comp);
+    if (__first1 == __last1)
+        return false;
+
+    if (__last2 - __first2 == 1)
+        return !__comp(*__first1, *__first2) && !__comp(*__first2, *__first1);
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return !__internal::__parallel_or(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first2, __last2,
+                [__first1, __last1, __first2, __last2, &__comp](_RandomAccessIterator2 __i, _RandomAccessIterator2 __j)
+                {
+                    _PSTL_ASSERT(__j > __i);
+                    //_PSTL_ASSERT(__j - __i > 1);
+
+                    //1. moving boundaries to "consume" subsequence of equal elements
+                    auto __is_equal = [&__comp](_RandomAccessIterator2 __a, _RandomAccessIterator2 __b) -> bool
+                    { return !__comp(*__a, *__b) && !__comp(*__b, *__a); };
+
+                    //1.1 left bound, case "aaa[aaaxyz...]" - searching "x"
+                    if (__i > __first2 && __is_equal(__i, __i - 1))
+                    {
+                        //whole subrange continues to content equal elements - return "no op"
+                        if (__is_equal(__i, __j - 1))
+                            return false;
+
+                        __i = std::upper_bound(__i, __last2, *__i, __comp);
+                    }
+
+                    //1.2 right bound, case "[...aaa]aaaxyz" - searching "x"
+                    if (__j < __last2 && __is_equal(__j - 1, __j))
+                        __j = std::upper_bound(__j, __last2, *__j, __comp);
+
+                    //2. testing is __a subsequence of the second range included into the first range
+                    auto __b = std::lower_bound(__first1, __last1, *__i, __comp);
+
+                    _PSTL_ASSERT(!__comp(*(__last1 - 1), *__b));
+                    _PSTL_ASSERT(!__comp(*(__j - 1), *__i));
+                    return !std::includes(__b, __last1, __i, __j, __comp);
+                });
+        });
+}
+
+constexpr auto __set_algo_cut_off = 1000;
+
+template <class _IsVector, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2,
+          class _OutputIterator, class _Compare, class _SizeFunction, class _SetOP>
+_OutputIterator
+__parallel_set_op(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1,
+                  _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2,
+                  _OutputIterator __result, _Compare __comp, _SizeFunction __size_func, _SetOP __set_op)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_ForwardIterator1>::
diff erence_type _DifferenceType;
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
+
+    struct _SetRange
+    {
+        _DifferenceType __pos, __len, __buf_pos;
+        bool
+        empty() const
+        {
+            return __len == 0;
+        }
+    };
+
+    const _DifferenceType __n1 = __last1 - __first1;
+    const _DifferenceType __n2 = __last2 - __first2;
+
+    __par_backend::__buffer<_Tp> __buf(__size_func(__n1, __n2));
+
+    return __internal::__except_handler(
+        [&__exec, __n1, __first1, __last1, __first2, __last2, __result, __comp, __size_func, __set_op, &__buf]()
+        {
+            auto __buffer = __buf.get();
+            _DifferenceType __m{};
+            auto __scan = [=](_DifferenceType, _DifferenceType, const _SetRange& __s) { // Scan
+                if (!__s.empty())
+                    __brick_move_destroy()(__buffer + __s.__buf_pos, __buffer + (__s.__buf_pos + __s.__len),
+                                           __result + __s.__pos, _IsVector{});
+            };
+            __par_backend::__parallel_strict_scan(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n1, _SetRange{0, 0, 0}, //-1, 0},
+                [=](_DifferenceType __i, _DifferenceType __len) {                                  // Reduce
+                    //[__b; __e) - a subrange of the first sequence, to reduce
+                    _ForwardIterator1 __b = __first1 + __i, __e = __first1 + (__i + __len);
+
+                    //try searching for the first element which not equal to *__b
+                    if (__b != __first1)
+                        __b = std::upper_bound(__b, __last1, *__b, __comp);
+
+                    //try searching for the first element which not equal to *__e
+                    if (__e != __last1)
+                        __e = std::upper_bound(__e, __last1, *__e, __comp);
+
+                    //check is [__b; __e) empty
+                    if (__e - __b < 1)
+                    {
+                        _ForwardIterator2 __bb = __last2;
+                        if (__b != __last1)
+                            __bb = std::lower_bound(__first2, __last2, *__b, __comp);
+
+                        const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2));
+                        return _SetRange{0, 0, __buf_pos};
+                    }
+
+                    //try searching for "corresponding" subrange [__bb; __ee) in the second sequence
+                    _ForwardIterator2 __bb = __first2;
+                    if (__b != __first1)
+                        __bb = std::lower_bound(__first2, __last2, *__b, __comp);
+
+                    _ForwardIterator2 __ee = __last2;
+                    if (__e != __last1)
+                        __ee = std::lower_bound(__bb, __last2, *__e, __comp);
+
+                    const _DifferenceType __buf_pos = __size_func((__b - __first1), (__bb - __first2));
+                    auto __buffer_b = __buffer + __buf_pos;
+                    auto __res = __set_op(__b, __e, __bb, __ee, __buffer_b, __comp);
+
+                    return _SetRange{0, __res - __buffer_b, __buf_pos};
+                },
+                [](const _SetRange& __a, const _SetRange& __b) { // Combine
+                    if (__b.__buf_pos > __a.__buf_pos || ((__b.__buf_pos == __a.__buf_pos) && !__b.empty()))
+                        return _SetRange{__a.__pos + __a.__len + __b.__pos, __b.__len, __b.__buf_pos};
+                    return _SetRange{__b.__pos + __b.__len + __a.__pos, __a.__len, __a.__buf_pos};
+                },
+                __scan,                                     // Scan
+                [&__m, &__scan](const _SetRange& __total) { // Apex
+                    //final scan
+                    __scan(0, 0, __total);
+                    __m = __total.__pos + __total.__len;
+                });
+            return __result + __m;
+        });
+}
+
+//a shared parallel pattern for '__pattern_set_union' and '__pattern_set_symmetric_
diff erence'
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare, class _SetUnionOp>
+_OutputIterator
+__parallel_set_union_op(_Tag __tag, _ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                        _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
+                        _Compare __comp, _SetUnionOp __set_union_op)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_ForwardIterator1>::
diff erence_type _DifferenceType;
+
+    const auto __n1 = __last1 - __first1;
+    const auto __n2 = __last2 - __first2;
+
+    auto copy_range1 = [](_ForwardIterator1 __begin, _ForwardIterator1 __end, _OutputIterator __res)
+    { return __internal::__brick_copy(__begin, __end, __res, typename _Tag::__is_vector{}); };
+    auto copy_range2 = [](_ForwardIterator2 __begin, _ForwardIterator2 __end, _OutputIterator __res)
+    { return __internal::__brick_copy(__begin, __end, __res, typename _Tag::__is_vector{}); };
+
+    // {1} {}: parallel copying just first sequence
+    if (__n2 == 0)
+        return __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                                                 __result, copy_range1);
+
+    // {} {2}: parallel copying justmake  second sequence
+    if (__n1 == 0)
+        return __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2,
+                                                 __result, copy_range2);
+
+    // testing  whether the sequences are intersected
+    _ForwardIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp);
+
+    if (__left_bound_seq_1 == __last1)
+    {
+        //{1} < {2}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2
+        __par_backend::__parallel_invoke(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec),
+            [=]
+            {
+                __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                                                  __result, copy_range1);
+            },
+            [=]
+            {
+                __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2,
+                                                  __result + __n1, copy_range2);
+            });
+        return __result + __n1 + __n2;
+    }
+
+    // testing  whether the sequences are intersected
+    _ForwardIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp);
+
+    if (__left_bound_seq_2 == __last2)
+    {
+        //{2} < {1}: seq2 is wholly greater than seq1, so, do parallel copying seq1 and seq2
+        __par_backend::__parallel_invoke(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec),
+            [=]
+            {
+                __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2, __last2,
+                                                  __result, copy_range2);
+            },
+            [=]
+            {
+                __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                                                  __result + __n2, copy_range1);
+            });
+        return __result + __n1 + __n2;
+    }
+
+    const auto __m1 = __left_bound_seq_1 - __first1;
+    if (__m1 > __set_algo_cut_off)
+    {
+        auto __res_or = __result;
+        __result += __m1; //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2)
+        __par_backend::__parallel_invoke(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec),
+            //do parallel copying of [first1; left_bound_seq_1)
+            [=]
+            {
+                __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                                  __left_bound_seq_1, __res_or, copy_range1);
+            },
+            [=, &__result]
+            {
+                __result = __internal::__parallel_set_op(
+                    __tag, std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2,
+                    __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; },
+                    __set_union_op);
+            });
+        return __result;
+    }
+
+    const auto __m2 = __left_bound_seq_2 - __first2;
+    _PSTL_ASSERT(__m1 == 0 || __m2 == 0);
+    if (__m2 > __set_algo_cut_off)
+    {
+        auto __res_or = __result;
+        __result += __m2; //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1)
+        __par_backend::__parallel_invoke(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec),
+            //do parallel copying of [first2; left_bound_seq_2)
+            [=]
+            {
+                __internal::__pattern_walk2_brick(__tag, std::forward<_ExecutionPolicy>(__exec), __first2,
+                                                  __left_bound_seq_2, __res_or, copy_range2);
+            },
+            [=, &__result]
+            {
+                __result = __internal::__parallel_set_op(
+                    __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2,
+                    __result, __comp, [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; },
+                    __set_union_op);
+            });
+        return __result;
+    }
+
+    return __internal::__parallel_set_op(
+        __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
+        [](_DifferenceType __n, _DifferenceType __m) { return __n + __m; }, __set_union_op);
+}
+
+//------------------------------------------------------------------------
+// set_union
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator
+__brick_set_union(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                  _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                  /*__is_vector=*/std::false_type) noexcept
+{
+    return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <typename _IsVector>
+struct __BrickCopyConstruct
+{
+    template <typename _ForwardIterator, typename _OutputIterator>
+    _OutputIterator
+    operator()(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result)
+    {
+        return __brick_uninitialized_copy(__first, __last, __result, _IsVector());
+    }
+};
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _OutputIterator, class _Compare>
+_OutputIterator
+__brick_set_union(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+                  _RandomAccessIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                  /*__is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_union(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                    _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
+                    _Compare __comp) noexcept
+{
+    return __internal::__brick_set_union(__first1, __last1, __first2, __last2, __result, __comp,
+                                         typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _OutputIterator, class _Compare>
+_OutputIterator
+__pattern_set_union(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                    _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                    _OutputIterator __result, _Compare __comp)
+{
+
+    const auto __n1 = __last1 - __first1;
+    const auto __n2 = __last2 - __first2;
+
+    // use serial algorithm
+    if (__n1 + __n2 <= __set_algo_cut_off)
+        return std::set_union(__first1, __last1, __first2, __last2, __result, __comp);
+
+    typedef typename std::iterator_traits<_OutputIterator>::value_type _Tp;
+    return __parallel_set_union_op(
+        __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
+        [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+           _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp)
+        {
+            return __pstl::__utils::__set_union_construct(__first1, __last1, __first2, __last2, __result, __comp,
+                                                          __BrickCopyConstruct<_IsVector>());
+        });
+}
+
+//------------------------------------------------------------------------
+// set_intersection
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator
+__brick_set_intersection(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                         _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                         /*__is_vector=*/std::false_type) noexcept
+{
+    return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__brick_set_intersection(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
+                         _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                         _RandomAccessIterator3 __result, _Compare __comp,
+                         /*__is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::set_intersection(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_intersection(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                           _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
+                           _Compare __comp) noexcept
+{
+    return __internal::__brick_set_intersection(__first1, __last1, __first2, __last2, __result, __comp,
+                                                typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__pattern_set_intersection(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                           _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+                           _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator3>::value_type _Tp;
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType;
+
+    const auto __n1 = __last1 - __first1;
+    const auto __n2 = __last2 - __first2;
+
+    // intersection is empty
+    if (__n1 == 0 || __n2 == 0)
+        return __result;
+
+    // testing  whether the sequences are intersected
+    _RandomAccessIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp);
+    //{1} < {2}: seq 2 is wholly greater than seq 1, so, the intersection is empty
+    if (__left_bound_seq_1 == __last1)
+        return __result;
+
+    // testing  whether the sequences are intersected
+    _RandomAccessIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp);
+    //{2} < {1}: seq 1 is wholly greater than seq 2, so, the intersection is empty
+    if (__left_bound_seq_2 == __last2)
+        return __result;
+
+    const auto __m1 = __last1 - __left_bound_seq_1 + __n2;
+    if (__m1 > __set_algo_cut_off)
+    {
+        //we know proper offset due to [first1; left_bound_seq_1) < [first2; last2)
+        return __internal::__parallel_set_op(
+            __tag, std::forward<_ExecutionPolicy>(__exec), __left_bound_seq_1, __last1, __first2, __last2, __result,
+            __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); },
+            [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+               _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) {
+                return __pstl::__utils::__set_intersection_construct(__first1, __last1, __first2, __last2, __result,
+                                                                     __comp);
+            });
+    }
+
+    const auto __m2 = __last2 - __left_bound_seq_2 + __n1;
+    if (__m2 > __set_algo_cut_off)
+    {
+        //we know proper offset due to [first2; left_bound_seq_2) < [first1; last1)
+        __result = __internal::__parallel_set_op(
+            __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __left_bound_seq_2, __last2, __result,
+            __comp, [](_DifferenceType __n, _DifferenceType __m) { return std::min(__n, __m); },
+            [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+               _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp) {
+                return __pstl::__utils::__set_intersection_construct(__first2, __last2, __first1, __last1, __result,
+                                                                     __comp);
+            });
+        return __result;
+    }
+
+    // [left_bound_seq_1; last1) and [left_bound_seq_2; last2) - use serial algorithm
+    return std::set_intersection(__left_bound_seq_1, __last1, __left_bound_seq_2, __last2, __result, __comp);
+}
+
+//------------------------------------------------------------------------
+// set_
diff erence
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator
+__brick_set_
diff erence(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                       _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                       /*__is_vector=*/std::false_type) noexcept
+{
+    return std::set_
diff erence(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__brick_set_
diff erence(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+                       _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp,
+                       /*__is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::set_
diff erence(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_
diff erence(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                         _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
+                         _Compare __comp) noexcept
+{
+    return __internal::__brick_set_
diff erence(__first1, __last1, __first2, __last2, __result, __comp,
+                                              typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__pattern_set_
diff erence(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                         _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+                         _RandomAccessIterator2 __last2, _RandomAccessIterator3 __result, _Compare __comp)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator3>::value_type _Tp;
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType;
+
+    const auto __n1 = __last1 - __first1;
+    const auto __n2 = __last2 - __first2;
+
+    // {} \ {2}: the 
diff erence is empty
+    if (__n1 == 0)
+        return __result;
+
+    // {1} \ {}: parallel copying just first sequence
+    if (__n2 == 0)
+        return __internal::__pattern_walk2_brick(
+            __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result,
+            [](_RandomAccessIterator1 __begin, _RandomAccessIterator1 __end, _RandomAccessIterator3 __res)
+            { return __internal::__brick_copy(__begin, __end, __res, _IsVector{}); });
+
+    // testing  whether the sequences are intersected
+    _RandomAccessIterator1 __left_bound_seq_1 = std::lower_bound(__first1, __last1, *__first2, __comp);
+    //{1} < {2}: seq 2 is wholly greater than seq 1, so, parallel copying just first sequence
+    if (__left_bound_seq_1 == __last1)
+        return __internal::__pattern_walk2_brick(
+            __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result,
+            [](_RandomAccessIterator1 __begin, _RandomAccessIterator1 __end, _RandomAccessIterator3 __res)
+            { return __internal::__brick_copy(__begin, __end, __res, _IsVector{}); });
+
+    // testing  whether the sequences are intersected
+    _RandomAccessIterator2 __left_bound_seq_2 = std::lower_bound(__first2, __last2, *__first1, __comp);
+    //{2} < {1}: seq 1 is wholly greater than seq 2, so, parallel copying just first sequence
+    if (__left_bound_seq_2 == __last2)
+        return __internal::__pattern_walk2_brick(
+            __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __result,
+            [](_RandomAccessIterator1 __begin, _RandomAccessIterator1 __end, _RandomAccessIterator3 __res)
+            { return __internal::__brick_copy(__begin, __end, __res, _IsVector{}); });
+
+    if (__n1 + __n2 > __set_algo_cut_off)
+        return __parallel_set_op(
+            __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
+            [](_DifferenceType __n, _DifferenceType) { return __n; },
+            [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+               _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp)
+            {
+                return __pstl::__utils::__set_
diff erence_construct(__first1, __last1, __first2, __last2, __result,
+                                                                   __comp, __BrickCopyConstruct<_IsVector>());
+            });
+
+    // use serial algorithm
+    return std::set_
diff erence(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+//------------------------------------------------------------------------
+// set_symmetric_
diff erence
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator, class _Compare>
+_OutputIterator
+__brick_set_symmetric_
diff erence(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                                 _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                                 /*__is_vector=*/std::false_type) noexcept
+{
+    return std::set_symmetric_
diff erence(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__brick_set_symmetric_
diff erence(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
+                                 _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                                 _RandomAccessIterator3 __result, _Compare __comp,
+                                 /*__is_vector=*/std::true_type) noexcept
+{
+    _PSTL_PRAGMA_MESSAGE("Vectorized algorithm unimplemented, redirected to serial");
+    return std::set_symmetric_
diff erence(__first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _OutputIterator,
+          class _Compare>
+_OutputIterator
+__pattern_set_symmetric_
diff erence(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _OutputIterator __result,
+                                   _Compare __comp) noexcept
+{
+    return __internal::__brick_set_symmetric_
diff erence(__first1, __last1, __first2, __last2, __result, __comp,
+                                                        typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _RandomAccessIterator3, class _Compare>
+_RandomAccessIterator3
+__pattern_set_symmetric_
diff erence(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
+                                   _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
+                                   _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                                   _RandomAccessIterator3 __result, _Compare __comp)
+{
+
+    const auto __n1 = __last1 - __first1;
+    const auto __n2 = __last2 - __first2;
+
+    // use serial algorithm
+    if (__n1 + __n2 <= __set_algo_cut_off)
+        return std::set_symmetric_
diff erence(__first1, __last1, __first2, __last2, __result, __comp);
+
+    typedef typename std::iterator_traits<_RandomAccessIterator3>::value_type _Tp;
+    return __internal::__parallel_set_union_op(
+        __tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp,
+        [](_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+           _RandomAccessIterator2 __last2, _Tp* __result, _Compare __comp)
+        {
+            return __pstl::__utils::__set_symmetric_
diff erence_construct(__first1, __last1, __first2, __last2, __result,
+                                                                         __comp, __BrickCopyConstruct<_IsVector>());
+        });
+}
+
+//------------------------------------------------------------------------
+// is_heap_until
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+                      /* __is_vector = */ std::false_type) noexcept
+{
+    return std::is_heap_until(__first, __last, __comp);
+}
+
+template <class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__brick_is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+                      /* __is_vector = */ std::true_type) noexcept
+{
+    if (__last - __first < 2)
+        return __last;
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _SizeType;
+    return __unseq_backend::__simd_first(
+        __first, _SizeType(0), __last - __first,
+        [&__comp](_RandomAccessIterator __it, _SizeType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__pattern_is_heap_until(_Tag, _ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last,
+                        _Compare __comp) noexcept
+{
+    return __internal::__brick_is_heap_until(__first, __last, __comp, typename _Tag::__is_vector{});
+}
+
+template <class _RandomAccessIterator, class _DifferenceType, class _Compare>
+_RandomAccessIterator
+__is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp,
+                      /* __is_vector = */ std::false_type) noexcept
+{
+    _DifferenceType __i = __begin;
+    for (; __i < __end; ++__i)
+    {
+        if (__comp(__first[(__i - 1) / 2], __first[__i]))
+        {
+            break;
+        }
+    }
+    return __first + __i;
+}
+
+template <class _RandomAccessIterator, class _DifferenceType, class _Compare>
+_RandomAccessIterator
+__is_heap_until_local(_RandomAccessIterator __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp,
+                      /* __is_vector = */ std::true_type) noexcept
+{
+    return __unseq_backend::__simd_first(
+        __first, __begin, __end,
+        [&__comp](_RandomAccessIterator __it, _DifferenceType __i) { return __comp(__it[(__i - 1) / 2], __it[__i]); });
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+_RandomAccessIterator
+__pattern_is_heap_until(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                        _RandomAccessIterator __last, _Compare __comp) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__last - __first < 2)
+        return __last;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __parallel_find(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                [__first, __comp](_RandomAccessIterator __i, _RandomAccessIterator __j) {
+                    return __internal::__is_heap_until_local(__first, __i - __first, __j - __first, __comp,
+                                                             _IsVector{});
+                },
+                std::less<typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type>(), /*is_first=*/true);
+        });
+}
+
+//------------------------------------------------------------------------
+// min_element
+//------------------------------------------------------------------------
+
+template <typename _ForwardIterator, typename _Compare>
+_ForwardIterator
+__brick_min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
+                    /* __is_vector = */ std::false_type) noexcept
+{
+    return std::min_element(__first, __last, __comp);
+}
+
+template <typename _RandomAccessIterator, typename _Compare>
+_RandomAccessIterator
+__brick_min_element(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+                    /* __is_vector = */ std::true_type) noexcept
+{
+#if defined(_PSTL_UDR_PRESENT)
+    return __unseq_backend::__simd_min_element(__first, __last - __first, __comp);
+#else
+    return std::min_element(__first, __last, __comp);
+#endif
+}
+
+template <typename _Tag, typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare>
+_ForwardIterator
+__pattern_min_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                      _Compare __comp) noexcept
+{
+    return __internal::__brick_min_element(__first, __last, __comp, typename _Tag::__is_vector{});
+}
+
+template <typename _IsVector, typename _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare>
+_RandomAccessIterator
+__pattern_min_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                      _RandomAccessIterator __last, _Compare __comp)
+{
+    if (__first == __last)
+        return __last;
+
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __par_backend::__parallel_reduce(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first + 1, __last, __first,
+                [=](_RandomAccessIterator __begin, _RandomAccessIterator __end,
+                    _RandomAccessIterator __init) -> _RandomAccessIterator
+                {
+                    const _RandomAccessIterator subresult =
+                        __internal::__brick_min_element(__begin, __end, __comp, _IsVector{});
+                    return __internal::__cmp_iterators_by_values(__init, subresult, __comp);
+                },
+                [=](_RandomAccessIterator __it1, _RandomAccessIterator __it2) -> _RandomAccessIterator
+                { return __internal::__cmp_iterators_by_values(__it1, __it2, __comp); });
+        });
+}
+
+//------------------------------------------------------------------------
+// minmax_element
+//------------------------------------------------------------------------
+
+template <typename _ForwardIterator, typename _Compare>
+std::pair<_ForwardIterator, _ForwardIterator>
+__brick_minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp,
+                       /* __is_vector = */ std::false_type) noexcept
+{
+    return std::minmax_element(__first, __last, __comp);
+}
+
+template <typename _RandomAccessIterator, typename _Compare>
+std::pair<_RandomAccessIterator, _RandomAccessIterator>
+__brick_minmax_element(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp,
+                       /* __is_vector = */ std::true_type) noexcept
+{
+#if defined(_PSTL_UDR_PRESENT)
+    return __unseq_backend::__simd_minmax_element(__first, __last - __first, __comp);
+#else
+    return std::minmax_element(__first, __last, __comp);
+#endif
+}
+
+template <typename _Tag, typename _ExecutionPolicy, typename _ForwardIterator, typename _Compare>
+std::pair<_ForwardIterator, _ForwardIterator>
+__pattern_minmax_element(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                         _Compare __comp) noexcept
+{
+    return __internal::__brick_minmax_element(__first, __last, __comp, typename _Tag::__is_vector{});
+}
+
+template <typename _IsVector, typename _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare>
+std::pair<_RandomAccessIterator, _RandomAccessIterator>
+__pattern_minmax_element(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                         _RandomAccessIterator __last, _Compare __comp)
+{
+    if (__first == __last)
+        return std::make_pair(__first, __first);
+
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler([&]() {
+        typedef std::pair<_RandomAccessIterator, _RandomAccessIterator> _Result;
+
+        return __par_backend::__parallel_reduce(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first + 1, __last,
+            std::make_pair(__first, __first),
+            [=](_RandomAccessIterator __begin, _RandomAccessIterator __end, _Result __init) -> _Result
+            {
+                const _Result __subresult = __internal::__brick_minmax_element(__begin, __end, __comp, _IsVector{});
+                return std::make_pair(
+                    __internal::__cmp_iterators_by_values(__subresult.first, __init.first, __comp),
+                    __internal::__cmp_iterators_by_values(__init.second, __subresult.second, std::not_fn(__comp)));
+            },
+            [=](_Result __p1, _Result __p2) -> _Result
+            {
+                return std::make_pair(
+                    __internal::__cmp_iterators_by_values(__p1.first, __p2.first, __comp),
+                    __internal::__cmp_iterators_by_values(__p2.second, __p1.second, std::not_fn(__comp)));
+            });
+    });
+}
+
+//------------------------------------------------------------------------
+// mismatch
+//------------------------------------------------------------------------
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+std::pair<_ForwardIterator1, _ForwardIterator2>
+__mismatch_serial(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                  _ForwardIterator2 __last2, _BinaryPredicate __pred)
+{
+#if defined(_PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT)
+    return std::mismatch(__first1, __last1, __first2, __last2, __pred);
+#else
+    for (; __first1 != __last1 && __first2 != __last2 && __pred(*__first1, *__first2); ++__first1, ++__first2)
+    {
+    }
+    return std::make_pair(__first1, __first2);
+#endif
+}
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+std::pair<_ForwardIterator1, _ForwardIterator2>
+__brick_mismatch(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                 _ForwardIterator2 __last2, _Predicate __pred, /* __is_vector = */ std::false_type) noexcept
+{
+    return __mismatch_serial(__first1, __last1, __first2, __last2, __pred);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Predicate>
+std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
+__brick_mismatch(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2,
+                 _RandomAccessIterator2 __last2, _Predicate __pred, /* __is_vector = */ std::true_type) noexcept
+{
+    auto __n = std::min(__last1 - __first1, __last2 - __first2);
+    return __unseq_backend::__simd_first(__first1, __n, __first2, std::not_fn(__pred));
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+std::pair<_ForwardIterator1, _ForwardIterator2>
+__pattern_mismatch(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                   _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Predicate __pred) noexcept
+{
+    return __internal::__brick_mismatch(__first1, __last1, __first2, __last2, __pred, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Predicate>
+std::pair<_RandomAccessIterator1, _RandomAccessIterator2>
+__pattern_mismatch(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                   _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                   _Predicate __pred) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler([&]() {
+        auto __n = std::min(__last1 - __first1, __last2 - __first2);
+        auto __result = __internal::__parallel_find(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
+            [__first1, __first2, __pred](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+            {
+                return __internal::__brick_mismatch(__i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1),
+                                                    __pred, _IsVector{})
+                    .first;
+            },
+            std::less<typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type>(), /*is_first=*/true);
+        return std::make_pair(__result, __first2 + (__result - __first1));
+    });
+}
+
+//------------------------------------------------------------------------
+// lexicographical_compare
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+bool
+__brick_lexicographical_compare(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                                _ForwardIterator2 __last2, _Compare __comp,
+                                /* __is_vector = */ std::false_type) noexcept
+{
+    return std::lexicographical_compare(__first1, __last1, __first2, __last2, __comp);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Compare>
+bool
+__brick_lexicographical_compare(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
+                                _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2, _Compare __comp,
+                                /* __is_vector = */ std::true_type) noexcept
+{
+    if (__first2 == __last2)
+    { // if second sequence is empty
+        return false;
+    }
+    else if (__first1 == __last1)
+    { // if first sequence is empty
+        return true;
+    }
+    else
+    {
+        typedef typename std::iterator_traits<_RandomAccessIterator1>::reference ref_type1;
+        typedef typename std::iterator_traits<_RandomAccessIterator2>::reference ref_type2;
+        --__last1;
+        --__last2;
+        auto __n = std::min(__last1 - __first1, __last2 - __first2);
+        std::pair<_RandomAccessIterator1, _RandomAccessIterator2> __result = __unseq_backend::__simd_first(
+            __first1, __n, __first2, [__comp](const ref_type1 __x, const ref_type2 __y) mutable {
+                return __comp(__x, __y) || __comp(__y, __x);
+            });
+
+        if (__result.first == __last1 && __result.second != __last2)
+        { // if first sequence shorter than second
+            return !__comp(*__result.second, *__result.first);
+        }
+        else
+        { // if second sequence shorter than first or both have the same number of elements
+            return __comp(*__result.first, *__result.second);
+        }
+    }
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+bool
+__pattern_lexicographical_compare(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                                  _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp) noexcept
+{
+    return __internal::__brick_lexicographical_compare(__first1, __last1, __first2, __last2, __comp,
+                                                       typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Compare>
+bool
+__pattern_lexicographical_compare(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
+                                  _RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
+                                  _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                                  _Compare __comp) noexcept
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    if (__first2 == __last2)
+    { // if second sequence is empty
+        return false;
+    }
+    else if (__first1 == __last1)
+    { // if first sequence is empty
+        return true;
+    }
+    else
+    {
+        typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _RefType1;
+        typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _RefType2;
+        --__last1;
+        --__last2;
+        auto __n = std::min(__last1 - __first1, __last2 - __first2);
+        auto __result = __internal::__parallel_find(
+            __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __first1 + __n,
+            [__first1, __first2, &__comp](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j)
+            {
+                return __internal::__brick_mismatch(
+                           __i, __j, __first2 + (__i - __first1), __first2 + (__j - __first1),
+                           [&__comp](const _RefType1 __x, const _RefType2 __y)
+                           { return !__comp(__x, __y) && !__comp(__y, __x); },
+                           _IsVector{})
+                    .first;
+            },
+            std::less<typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type>(), /*is_first=*/true);
+
+        if (__result == __last1 && __first2 + (__result - __first1) != __last2)
+        { // if first sequence shorter than second
+            return !__comp(*(__first2 + (__result - __first1)), *__result);
+        }
+        else
+        { // if second sequence shorter than first or both have the same number of elements
+            return __comp(*__result, *(__first2 + (__result - __first1)));
+        }
+    }
+}
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_ALGORITHM_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/execution_defs.h b/libcxx/include/pstl/internal/execution_defs.h
new file mode 100644
index 0000000000000..d7c4126972338
--- /dev/null
+++ b/libcxx/include/pstl/internal/execution_defs.h
@@ -0,0 +1,100 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_EXECUTION_POLICY_DEFS_H
+#define _PSTL_EXECUTION_POLICY_DEFS_H
+
+#include <type_traits>
+
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace execution
+{
+inline namespace v1
+{
+
+// 2.4, Sequential execution policy
+class sequenced_policy
+{
+};
+
+// 2.5, Parallel execution policy
+class parallel_policy
+{
+};
+
+// 2.6, Parallel+Vector execution policy
+class parallel_unsequenced_policy
+{
+};
+
+class unsequenced_policy
+{
+};
+
+// 2.8, Execution policy objects
+constexpr sequenced_policy seq{};
+constexpr parallel_policy par{};
+constexpr parallel_unsequenced_policy par_unseq{};
+constexpr unsequenced_policy unseq{};
+
+// 2.3, Execution policy type trait
+template <class T>
+struct is_execution_policy : std::false_type
+{
+};
+
+template <>
+struct is_execution_policy<__pstl::execution::sequenced_policy> : std::true_type
+{
+};
+template <>
+struct is_execution_policy<__pstl::execution::parallel_policy> : std::true_type
+{
+};
+template <>
+struct is_execution_policy<__pstl::execution::parallel_unsequenced_policy> : std::true_type
+{
+};
+template <>
+struct is_execution_policy<__pstl::execution::unsequenced_policy> : std::true_type
+{
+};
+
+#if defined(_PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT)
+template <class T>
+constexpr bool is_execution_policy_v = __pstl::execution::is_execution_policy<T>::value;
+#endif
+
+} // namespace v1
+} // namespace execution
+
+namespace __internal
+{
+template <class ExecPolicy, class T>
+using __enable_if_execution_policy =
+    typename std::enable_if<__pstl::execution::is_execution_policy<typename std::decay<ExecPolicy>::type>::value,
+                            T>::type;
+
+template <class _IsVector>
+struct __serial_tag;
+template <class _IsVector>
+struct __parallel_tag;
+
+} // namespace __internal
+
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_EXECUTION_POLICY_DEFS_H */

diff  --git a/libcxx/include/pstl/internal/execution_impl.h b/libcxx/include/pstl/internal/execution_impl.h
new file mode 100644
index 0000000000000..5dc622b441221
--- /dev/null
+++ b/libcxx/include/pstl/internal/execution_impl.h
@@ -0,0 +1,105 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_EXECUTION_IMPL_H
+#define _PSTL_EXECUTION_IMPL_H
+
+#include <iterator>
+#include <type_traits>
+
+#include "pstl_config.h"
+#include "execution_defs.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+template <typename _IteratorTag, typename... _IteratorTypes>
+using __are_iterators_of = std::conjunction<
+    std::is_base_of<_IteratorTag, typename std::iterator_traits<std::decay_t<_IteratorTypes>>::iterator_category>...>;
+
+template <typename... _IteratorTypes>
+using __are_random_access_iterators = __are_iterators_of<std::random_access_iterator_tag, _IteratorTypes...>;
+
+struct __serial_backend_tag
+{
+};
+struct __tbb_backend_tag
+{
+};
+struct __openmp_backend_tag
+{
+};
+
+#if defined(_PSTL_PAR_BACKEND_TBB)
+using __par_backend_tag = __tbb_backend_tag;
+#elif defined(_PSTL_PAR_BACKEND_OPENMP)
+using __par_backend_tag = __openmp_backend_tag;
+#elif defined(_PSTL_PAR_BACKEND_SERIAL)
+using __par_backend_tag = __serial_backend_tag;
+#else
+#    error "A parallel backend must be specified";
+#endif
+
+template <class _IsVector>
+struct __serial_tag
+{
+    using __is_vector = _IsVector;
+};
+
+template <class _IsVector>
+struct __parallel_tag
+{
+    using __is_vector = _IsVector;
+    // backend tag can be change depending on
+    // TBB availability in the environment
+    using __backend_tag = __par_backend_tag;
+};
+
+template <class _IsVector, class... _IteratorTypes>
+using __tag_type = typename std::conditional<__internal::__are_random_access_iterators<_IteratorTypes...>::value,
+                                             __parallel_tag<_IsVector>, __serial_tag<_IsVector>>::type;
+
+template <class... _IteratorTypes>
+__serial_tag</*_IsVector = */ std::false_type>
+__select_backend(__pstl::execution::sequenced_policy, _IteratorTypes&&...)
+{
+    return {};
+}
+
+template <class... _IteratorTypes>
+__serial_tag<__internal::__are_random_access_iterators<_IteratorTypes...>>
+__select_backend(__pstl::execution::unsequenced_policy, _IteratorTypes&&...)
+{
+    return {};
+}
+
+template <class... _IteratorTypes>
+__tag_type</*_IsVector = */ std::false_type, _IteratorTypes...>
+__select_backend(__pstl::execution::parallel_policy, _IteratorTypes&&...)
+{
+    return {};
+}
+
+template <class... _IteratorTypes>
+__tag_type<__internal::__are_random_access_iterators<_IteratorTypes...>, _IteratorTypes...>
+__select_backend(__pstl::execution::parallel_unsequenced_policy, _IteratorTypes&&...)
+{
+    return {};
+}
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_EXECUTION_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/glue_algorithm_defs.h b/libcxx/include/pstl/internal/glue_algorithm_defs.h
new file mode 100644
index 0000000000000..28a7f92163b1c
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_algorithm_defs.h
@@ -0,0 +1,558 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_ALGORITHM_DEFS_H
+#define _PSTL_GLUE_ALGORITHM_DEFS_H
+
+#include <functional>
+#include <iterator>
+
+#include "execution_defs.h"
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace std
+{
+
+// [alg.any_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred);
+
+// [alg.all_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+all_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred);
+
+// [alg.none_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+none_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred);
+
+// [alg.foreach]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Function>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Function>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f);
+
+// [alg.find]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+find_if_not(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value);
+
+// [alg.find.end]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+         _ForwardIterator2 __s_last, _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+         _ForwardIterator2 __s_last);
+
+// [alg.find_first_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+              _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+              _ForwardIterator2 __s_first, _ForwardIterator2 __s_last);
+
+// [alg.adjacent_find]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred);
+
+// [alg.count]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy,
+                                                 typename iterator_traits<_ForwardIterator>::
diff erence_type>
+count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy,
+                                                 typename iterator_traits<_ForwardIterator>::
diff erence_type>
+count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred);
+
+// [alg.search]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+       _ForwardIterator2 __s_last, _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+       _ForwardIterator2 __s_last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count,
+         const _Tp& __value, _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count,
+         const _Tp& __value);
+
+// [alg.copy]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _ForwardIterator2 __result);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 result,
+        _Predicate __pred);
+
+// [alg.swap]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+            _ForwardIterator2 __first2);
+
+// [alg.transform]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+          _UnaryOperation __op);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+          _ForwardIterator __result, _BinaryOperation __op);
+
+// [alg.replace]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
+           const _Tp& __new_value);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+replace(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_value,
+        const _Tp& __new_value);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryPredicate, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+replace_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+             const _Tp& __old_value, const _Tp& __new_value);
+
+// [alg.fill]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const _Tp& __value);
+
+// [alg.generate]
+template <class _ExecutionPolicy, class _ForwardIterator, class _Generator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Generator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size count, _Generator __g);
+
+// [alg.remove]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+remove_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _Predicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+remove_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+            const _Tp& __value);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+remove(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value);
+
+// [alg.unique]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+            _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result);
+
+// [alg.reverse]
+
+template <class _ExecutionPolicy, class _BidirectionalIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last);
+
+template <class _ExecutionPolicy, class _BidirectionalIterator, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
+             _ForwardIterator __d_first);
+
+// [alg.rotate]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last,
+            _ForwardIterator2 __result);
+
+// [alg.partitions]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _BidirectionalIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _BidirectionalIterator>
+stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
+                 _UnaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _ForwardIterator1, class _ForwardIterator2,
+          class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+partition_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
+               _ForwardIterator1 __out_true, _ForwardIterator2 __out_false, _UnaryPredicate __pred);
+
+// [alg.sort]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last);
+
+// [stable.sort]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last);
+
+// [mismatch]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2, _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _BinaryPredicate __pred);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2);
+
+// [alg.equal]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _BinaryPredicate __p);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2, _BinaryPredicate __p);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2);
+
+// [alg.move]
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first);
+
+// [partial.sort]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle,
+             _RandomAccessIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle,
+             _RandomAccessIterator __last);
+
+// [partial.sort.copy]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
+                  _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
+                  _RandomAccessIterator __d_first, _RandomAccessIterator __d_last);
+
+// [is.sorted]
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+// [alg.nth.element]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth,
+            _RandomAccessIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth,
+            _RandomAccessIterator __last);
+
+// [alg.merge]
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2, _ForwardIterator __d_first);
+
+template <class _ExecutionPolicy, class _BidirectionalIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle,
+              _BidirectionalIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _BidirectionalIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle,
+              _BidirectionalIterator __last);
+
+// [includes]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2);
+
+// [set.union]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+          _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+          _ForwardIterator2 __last2, _ForwardIterator __result);
+
+// [set.intersection]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result);
+
+// [set.
diff erence]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+               _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+               _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result);
+
+// [set.symmetric.
diff erence]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_symmetric_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                         _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator result,
+                         _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_symmetric_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                         _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result);
+
+// [is.heap]
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last);
+
+// [alg.min.max]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>>
+minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>>
+minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+// [alg.lex.comparison]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                        _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                        _ForwardIterator2 __first2, _ForwardIterator2 __last2);
+
+} // namespace std
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_GLUE_ALGORITHM_DEFS_H */

diff  --git a/libcxx/include/pstl/internal/glue_algorithm_impl.h b/libcxx/include/pstl/internal/glue_algorithm_impl.h
new file mode 100644
index 0000000000000..fb0c19d776658
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_algorithm_impl.h
@@ -0,0 +1,1108 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_ALGORITHM_IMPL_H
+#define _PSTL_GLUE_ALGORITHM_IMPL_H
+
+#include <functional>
+
+#include "pstl_config.h"
+
+#include "execution_defs.h"
+#include "utils.h"
+#include "algorithm_fwd.h"
+#include "numeric_fwd.h" /* count and count_if use __pattern_transform_reduce */
+
+#include "execution_impl.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace std
+{
+
+// [alg.any_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+any_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_any_of(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                __pred);
+}
+
+// [alg.all_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Pred>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+all_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Pred __pred)
+{
+    return !std::any_of(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::not_fn(__pred));
+}
+
+// [alg.none_of]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+none_of(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
+{
+    return !std::any_of(std::forward<_ExecutionPolicy>(__exec), __first, __last, __pred);
+}
+
+// [alg.foreach]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Function>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+for_each(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Function __f)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __f);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Function>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+for_each_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, _Function __f)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_walk1_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n,
+                                                 __f);
+}
+
+// [alg.find]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+find_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_find_if(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                 __last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+find_if_not(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
+{
+    return std::find_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::not_fn(__pred));
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
+{
+    return std::find_if(std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                        __pstl::__internal::__equal_value<_Tp>(__value));
+}
+
+// [alg.find.end]
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+         _ForwardIterator2 __s_last, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __s_first);
+
+    return __pstl::__internal::__pattern_find_end(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                  __last, __s_first, __s_last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_end(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+         _ForwardIterator2 __s_last)
+{
+    return std::find_end(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last,
+                         std::equal_to<>());
+}
+
+// [alg.find_first_of]
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+              _ForwardIterator2 __s_first, _ForwardIterator2 __s_last, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __s_first);
+
+    return __pstl::__internal::__pattern_find_first_of(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                       __last, __s_first, __s_last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+find_first_of(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+              _ForwardIterator2 __s_first, _ForwardIterator2 __s_last)
+{
+    return std::find_first_of(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last,
+                              std::equal_to<>());
+}
+
+// [alg.adjacent_find]
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    return __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                       __last, std::equal_to<_ValueType>(), /*first_semantic*/ false);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+adjacent_find(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                       __last, __pred, /*first_semantic*/ false);
+}
+
+// [alg.count]
+
+// Implementation note: count and count_if call the pattern directly instead of calling std::transform_reduce
+// so that we do not have to include <numeric>.
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy,
+                                                 typename iterator_traits<_ForwardIterator>::
diff erence_type>
+count(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    return __pstl::__internal::__pattern_count(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                               [&__value](const _ValueType& __x) { return __value == __x; });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy,
+                                                 typename iterator_traits<_ForwardIterator>::
diff erence_type>
+count_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_count(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                               __pred);
+}
+
+// [alg.search]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+       _ForwardIterator2 __s_last, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __s_first);
+
+    return __pstl::__internal::__pattern_search(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                __s_first, __s_last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator1>
+search(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+       _ForwardIterator2 __s_last)
+{
+    return std::search(std::forward<_ExecutionPolicy>(__exec), __first, __last, __s_first, __s_last, std::equal_to<>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count,
+         const _Tp& __value, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_search_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                  __last, __count, __value, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+search_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Size __count,
+         const _Tp& __value)
+{
+    return std::search_n(std::forward<_ExecutionPolicy>(__exec), __first, __last, __count, __value,
+                         std::equal_to<typename iterator_traits<_ForwardIterator>::value_type>());
+}
+
+// [alg.copy]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__pattern_walk2_brick(
+        __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+        [](_ForwardIterator1 __begin, _ForwardIterator1 __end, _ForwardIterator2 __res)
+        { return __pstl::__internal::__brick_copy(__begin, __end, __res, __is_vector{}); });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _Size, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+copy_n(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _Size __n, _ForwardIterator2 __result)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__pattern_walk2_brick_n(
+        __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
+        [](_ForwardIterator1 __begin, _Size __sz, _ForwardIterator2 __res)
+        { return __pstl::__internal::__brick_copy_n(__begin, __sz, __res, __is_vector{}); });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+        _Predicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_copy_if(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                 __last, __result, __pred);
+}
+
+// [alg.swap]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+swap_ranges(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+            _ForwardIterator2 __first2)
+{
+    typedef typename iterator_traits<_ForwardIterator1>::reference _ReferenceType1;
+    typedef typename iterator_traits<_ForwardIterator2>::reference _ReferenceType2;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    return __pstl::__internal::__pattern_walk2(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                               __last1, __first2,
+                                               [](_ReferenceType1 __x, _ReferenceType2 __y)
+                                               {
+                                                   using std::swap;
+                                                   swap(__x, __y);
+                                               });
+}
+
+// [alg.transform]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+          _UnaryOperation __op)
+{
+    typedef typename iterator_traits<_ForwardIterator1>::reference _InputType;
+    typedef typename iterator_traits<_ForwardIterator2>::reference _OutputType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_walk2(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                               __result,
+                                               [__op](_InputType __x, _OutputType __y) mutable { __y = __op(__x); });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+transform(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+          _ForwardIterator __result, _BinaryOperation __op)
+{
+    typedef typename iterator_traits<_ForwardIterator1>::reference _Input1Type;
+    typedef typename iterator_traits<_ForwardIterator2>::reference _Input2Type;
+    typedef typename iterator_traits<_ForwardIterator>::reference _OutputType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result);
+
+    return __pstl::__internal::__pattern_walk3(
+        __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __result,
+        [__op](_Input1Type x, _Input2Type y, _OutputType z) mutable { z = __op(x, y); });
+}
+
+// [alg.replace]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+replace_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred,
+           const _Tp& __new_value)
+{
+    typedef typename iterator_traits<_ForwardIterator>::reference _ElementType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                        [&__pred, &__new_value](_ElementType __elem)
+                                        {
+                                            if (__pred(__elem))
+                                            {
+                                                __elem = __new_value;
+                                            }
+                                        });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+replace(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __old_value,
+        const _Tp& __new_value)
+{
+    std::replace_if(std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                    __pstl::__internal::__equal_value<_Tp>(__old_value), __new_value);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryPredicate, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+replace_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                _ForwardIterator2 __result, _UnaryPredicate __pred, const _Tp& __new_value)
+{
+    typedef typename iterator_traits<_ForwardIterator1>::reference _InputType;
+    typedef typename iterator_traits<_ForwardIterator2>::reference _OutputType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_walk2(
+        __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+        [__pred, &__new_value](_InputType __x, _OutputType __y) mutable { __y = __pred(__x) ? __new_value : __x; });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+replace_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+             const _Tp& __old_value, const _Tp& __new_value)
+{
+    return std::replace_copy_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+                                __pstl::__internal::__equal_value<_Tp>(__old_value), __new_value);
+}
+
+// [alg.fill]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_fill(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                       __value);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, const _Tp& __value)
+{
+    if (__count <= 0)
+        return __first;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_fill_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                __count, __value);
+}
+
+// [alg.generate]
+template <class _ExecutionPolicy, class _ForwardIterator, class _Generator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+generate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Generator __g)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_generate(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                           __g);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Generator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+generate_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __count, _Generator __g)
+{
+    if (__count <= 0)
+        return __first;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_generate_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                    __count, __g);
+}
+
+// [alg.remove]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Predicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+remove_copy_if(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _Predicate __pred)
+{
+    return std::copy_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, std::not_fn(__pred));
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+remove_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+            const _Tp& __value)
+{
+    return std::copy_if(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+                        __pstl::__internal::__not_equal_value<_Tp>(__value));
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+remove_if(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_remove_if(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                   __last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+remove(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
+{
+    return std::remove_if(std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                          __pstl::__internal::__equal_value<_Tp>(__value));
+}
+
+// [alg.unique]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_unique(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+unique(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    return std::unique(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::equal_to<>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result,
+            _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_unique_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                     __last, __result, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+unique_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __result)
+{
+    return std::unique_copy(__exec, __first, __last, __result, std::equal_to<>());
+}
+
+// [alg.reverse]
+
+template <class _ExecutionPolicy, class _BidirectionalIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+reverse(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_reverse(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last);
+}
+
+template <class _ExecutionPolicy, class _BidirectionalIterator, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+reverse_copy(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
+             _ForwardIterator __d_first)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first);
+
+    return __pstl::__internal::__pattern_reverse_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                      __last, __d_first);
+}
+
+// [alg.rotate]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+rotate(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __middle, _ForwardIterator __last)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_rotate(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                __middle, __last);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+rotate_copy(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __middle, _ForwardIterator1 __last,
+            _ForwardIterator2 __result)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_rotate_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                     __middle, __last, __result);
+}
+
+// [alg.partitions]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_partitioned(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_is_partitioned(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                        __last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+partition(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _UnaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_partition(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                   __last, __pred);
+}
+
+template <class _ExecutionPolicy, class _BidirectionalIterator, class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _BidirectionalIterator>
+stable_partition(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __last,
+                 _UnaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_stable_partition(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                          __first, __last, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _ForwardIterator1, class _ForwardIterator2,
+          class _UnaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+partition_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
+               _ForwardIterator1 __out_true, _ForwardIterator2 __out_false, _UnaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __out_true, __out_false);
+
+    return __pstl::__internal::__pattern_partition_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                        __last, __out_true, __out_false, __pred);
+}
+
+// [alg.sort]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    typedef typename iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    return __pstl::__internal::__pattern_sort(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                              __comp, typename std::is_move_constructible<_InputType>::type());
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    std::sort(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>());
+}
+
+// [stable.sort]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_stable_sort(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                     __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+stable_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    std::stable_sort(__exec, __first, __last, std::less<_InputType>());
+}
+
+// [mismatch]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2, _BinaryPredicate __pred)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    return __pstl::__internal::__pattern_mismatch(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                                  __last1, __first2, __last2, __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _BinaryPredicate __pred)
+{
+    return std::mismatch(__exec, __first1, __last1, __first2, std::next(__first2, std::distance(__first1, __last1)),
+                         __pred);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2)
+{
+    return std::mismatch(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2,
+                         std::equal_to<>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator1, _ForwardIterator2>>
+mismatch(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2)
+{
+    //TODO: to get rid of "distance"
+    return std::mismatch(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2,
+                         std::next(__first2, std::distance(__first1, __last1)));
+}
+
+// [alg.equal]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _BinaryPredicate __p)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    return __pstl::__internal::__pattern_equal(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                               __last1, __first2, __p);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2)
+{
+    return std::equal(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, std::equal_to<>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2, _BinaryPredicate __p)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    return __pstl::__internal::__pattern_equal(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                               __last1, __first2, __last2, __p);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+equal(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2)
+{
+    return equal(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, std::equal_to<>());
+}
+
+// [alg.move]
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+move(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __d_first)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__pattern_walk2_brick(
+        __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first,
+        [](_ForwardIterator1 __begin, _ForwardIterator1 __end, _ForwardIterator2 __res)
+        { return __pstl::__internal::__brick_move(__begin, __end, __res, __is_vector{}); });
+}
+
+// [partial.sort]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle,
+             _RandomAccessIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_partial_sort(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                               __middle, __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+partial_sort(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __middle,
+             _RandomAccessIterator __last)
+{
+    typedef typename iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    std::partial_sort(__exec, __first, __middle, __last, std::less<_InputType>());
+}
+
+// [partial.sort.copy]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
+                  _RandomAccessIterator __d_first, _RandomAccessIterator __d_last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first);
+
+    return __pstl::__internal::__pattern_partial_sort_copy(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                           __first, __last, __d_first, __d_last, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+partial_sort_copy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last,
+                  _RandomAccessIterator __d_first, _RandomAccessIterator __d_last)
+{
+    return std::partial_sort_copy(std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first, __d_last,
+                                  std::less<>());
+}
+
+// [is.sorted]
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    const _ForwardIterator __res =
+        __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                    __last, __pstl::__internal::__reorder_pred<_Compare>(__comp),
+                                                    /*first_semantic*/ false);
+    return __res == __last ? __last : std::next(__res);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+is_sorted_until(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType;
+    return is_sorted_until(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_adjacent_find(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                       __last, __pstl::__internal::__reorder_pred<_Compare>(__comp),
+                                                       /*or_semantic*/ true) == __last;
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_sorted(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType;
+    return std::is_sorted(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>());
+}
+
+// [alg.merge]
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2, _ForwardIterator __d_first, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __d_first);
+
+    return __pstl::__internal::__pattern_merge(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                               __last1, __first2, __last2, __d_first, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+merge(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+      _ForwardIterator2 __last2, _ForwardIterator __d_first)
+{
+    return std::merge(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __d_first,
+                      std::less<>());
+}
+
+template <class _ExecutionPolicy, class _BidirectionalIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle,
+              _BidirectionalIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_inplace_merge(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                __middle, __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _BidirectionalIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+inplace_merge(_ExecutionPolicy&& __exec, _BidirectionalIterator __first, _BidirectionalIterator __middle,
+              _BidirectionalIterator __last)
+{
+    typedef typename std::iterator_traits<_BidirectionalIterator>::value_type _InputType;
+    std::inplace_merge(__exec, __first, __middle, __last, std::less<_InputType>());
+}
+
+// [includes]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    return __pstl::__internal::__pattern_includes(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                                  __last1, __first2, __last2, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+includes(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+         _ForwardIterator2 __last2)
+{
+    return std::includes(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, std::less<>());
+}
+
+// [set.union]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+          _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result);
+
+    return __pstl::__internal::__pattern_set_union(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1,
+                                                   __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_union(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+          _ForwardIterator2 __last2, _ForwardIterator __result)
+{
+    return std::set_union(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result,
+                          std::less<>());
+}
+
+// [set.intersection]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result);
+
+    return __pstl::__internal::__pattern_set_intersection(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                          __first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_intersection(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result)
+{
+    return std::set_intersection(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result,
+                                 std::less<>());
+}
+
+// [set.
diff erence]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+               _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result);
+
+    return __pstl::__internal::__pattern_set_
diff erence(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                        __first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+               _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result)
+{
+    return std::set_
diff erence(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result,
+                               std::less<>());
+}
+
+// [set.symmetric.
diff erence]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator,
+          class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_symmetric_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                         _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result,
+                         _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2, __result);
+
+    return __pstl::__internal::__pattern_set_symmetric_
diff erence(
+        __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2, __result, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+set_symmetric_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                         _ForwardIterator2 __first2, _ForwardIterator2 __last2, _ForwardIterator __result)
+{
+    return std::set_symmetric_
diff erence(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2,
+                                         __result, std::less<>());
+}
+
+// [is.heap]
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__pattern_is_heap_until(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                       __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _RandomAccessIterator>
+is_heap_until(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    return std::is_heap_until(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>());
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp)
+{
+    return std::is_heap_until(std::forward<_ExecutionPolicy>(__exec), __first, __last, __comp) == __last;
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+is_heap(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __last)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    return std::is_heap(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>());
+}
+
+// [alg.min.max]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_min_element(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                     __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+min_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType;
+    return std::min_element(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_InputType>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
+{
+    return min_element(std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                       __pstl::__internal::__reorder_pred<_Compare>(__comp));
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+max_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _InputType;
+    return std::min_element(std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                            __pstl::__internal::__reorder_pred<std::less<_InputType>>(std::less<_InputType>()));
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>>
+minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_minmax_element(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                        __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, std::pair<_ForwardIterator, _ForwardIterator>>
+minmax_element(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    return std::minmax_element(std::forward<_ExecutionPolicy>(__exec), __first, __last, std::less<_ValueType>());
+}
+
+// [alg.nth.element]
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth,
+            _RandomAccessIterator __last, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__pattern_nth_element(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __nth,
+                                              __last, __comp);
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+nth_element(_ExecutionPolicy&& __exec, _RandomAccessIterator __first, _RandomAccessIterator __nth,
+            _RandomAccessIterator __last)
+{
+    typedef typename iterator_traits<_RandomAccessIterator>::value_type _InputType;
+    std::nth_element(std::forward<_ExecutionPolicy>(__exec), __first, __nth, __last, std::less<_InputType>());
+}
+
+// [alg.lex.comparison]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Compare>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                        _ForwardIterator2 __first2, _ForwardIterator2 __last2, _Compare __comp)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    return __pstl::__internal::__pattern_lexicographical_compare(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                                 __first1, __last1, __first2, __last2, __comp);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, bool>
+lexicographical_compare(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                        _ForwardIterator2 __first2, _ForwardIterator2 __last2)
+{
+    return std::lexicographical_compare(std::forward<_ExecutionPolicy>(__exec), __first1, __last1, __first2, __last2,
+                                        std::less<>());
+}
+
+} // namespace std
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_GLUE_ALGORITHM_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/glue_execution_defs.h b/libcxx/include/pstl/internal/glue_execution_defs.h
new file mode 100644
index 0000000000000..df9a477f721ea
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_execution_defs.h
@@ -0,0 +1,55 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_EXECUTION_DEFS_H
+#define _PSTL_GLUE_EXECUTION_DEFS_H
+
+#include <type_traits>
+
+#include "execution_defs.h"
+#include "pstl_config.h"
+
+namespace std
+{
+// Type trait
+using __pstl::execution::is_execution_policy;
+#if defined(_PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT)
+#    if defined(__INTEL_COMPILER)
+template <class T>
+constexpr bool is_execution_policy_v = is_execution_policy<T>::value;
+#    else
+using __pstl::execution::is_execution_policy_v;
+#    endif
+#endif
+
+namespace execution
+{
+// Standard C++ policy classes
+using __pstl::execution::parallel_policy;
+using __pstl::execution::parallel_unsequenced_policy;
+using __pstl::execution::sequenced_policy;
+
+// Standard predefined policy instances
+using __pstl::execution::par;
+using __pstl::execution::par_unseq;
+using __pstl::execution::seq;
+
+// Implementation-defined names
+// Unsequenced policy is not yet standard, but for consistency
+// we include it into namespace std::execution as well
+using __pstl::execution::unseq;
+using __pstl::execution::unsequenced_policy;
+} // namespace execution
+} // namespace std
+
+#include "algorithm_impl.h"
+#include "numeric_impl.h"
+#include "parallel_backend.h"
+
+#endif /* _PSTL_GLUE_EXECUTION_DEFS_H */

diff  --git a/libcxx/include/pstl/internal/glue_memory_defs.h b/libcxx/include/pstl/internal/glue_memory_defs.h
new file mode 100644
index 0000000000000..ae523339d52d6
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_memory_defs.h
@@ -0,0 +1,85 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_MEMORY_DEFS_H
+#define _PSTL_GLUE_MEMORY_DEFS_H
+
+#include "execution_defs.h"
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace std
+{
+
+// [uninitialized.copy]
+
+template <class _ExecutionPolicy, class _InputIterator, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result);
+
+template <class _ExecutionPolicy, class _InputIterator, class _Size, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result);
+
+// [uninitialized.move]
+
+template <class _ExecutionPolicy, class _InputIterator, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result);
+
+template <class _ExecutionPolicy, class _InputIterator, class _Size, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result);
+
+// [uninitialized.fill]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, const _Tp& __value);
+
+// [specialized.destroy]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n);
+
+// [uninitialized.construct.default]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n);
+
+// [uninitialized.construct.value]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n);
+
+} //  namespace std
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_GLUE_MEMORY_DEFS_H */

diff  --git a/libcxx/include/pstl/internal/glue_memory_impl.h b/libcxx/include/pstl/internal/glue_memory_impl.h
new file mode 100644
index 0000000000000..39c595d697a28
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_memory_impl.h
@@ -0,0 +1,352 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_MEMORY_IMPL_H
+#define _PSTL_GLUE_MEMORY_IMPL_H
+
+#include "pstl_config.h"
+
+#include "execution_defs.h"
+#include "utils.h"
+#include "algorithm_fwd.h"
+
+#include "execution_impl.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace std
+{
+
+// [uninitialized.copy]
+
+template <class _ExecutionPolicy, class _InputIterator, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_copy(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result)
+{
+    typedef typename iterator_traits<_InputIterator>::value_type _ValueType1;
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2;
+    typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__invoke_if_else(
+        std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (),
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2_brick(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+                [](_InputIterator __begin, _InputIterator __end, _ForwardIterator __res)
+                { return __pstl::__internal::__brick_copy(__begin, __end, __res, __is_vector{}); });
+        },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                       __last, __result,
+                                                       [](_ReferenceType1 __val1, _ReferenceType2 __val2)
+                                                       { ::new (std::addressof(__val2)) _ValueType2(__val1); });
+        });
+}
+
+template <class _ExecutionPolicy, class _InputIterator, class _Size, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_copy_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result)
+{
+    typedef typename iterator_traits<_InputIterator>::value_type _ValueType1;
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2;
+    typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__invoke_if_else(
+        std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (),
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2_brick_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
+                [](_InputIterator __begin, _Size __sz, _ForwardIterator __res)
+                { return __pstl::__internal::__brick_copy_n(__begin, __sz, __res, __is_vector{}); });
+        },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                         __first, __n, __result,
+                                                         [](_ReferenceType1 __val1, _ReferenceType2 __val2)
+                                                         { ::new (std::addressof(__val2)) _ValueType2(__val1); });
+        });
+}
+
+// [uninitialized.move]
+
+template <class _ExecutionPolicy, class _InputIterator, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_move(_ExecutionPolicy&& __exec, _InputIterator __first, _InputIterator __last, _ForwardIterator __result)
+{
+    typedef typename iterator_traits<_InputIterator>::value_type _ValueType1;
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2;
+    typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__invoke_if_else(
+        std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (),
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2_brick(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+                [](_InputIterator __begin, _InputIterator __end, _ForwardIterator __res)
+                { return __pstl::__internal::__brick_copy(__begin, __end, __res, __is_vector{}); });
+        },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+                [](_ReferenceType1 __val1, _ReferenceType2 __val2)
+                { ::new (std::addressof(__val2)) _ValueType2(std::move(__val1)); });
+        });
+}
+
+template <class _ExecutionPolicy, class _InputIterator, class _Size, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_move_n(_ExecutionPolicy&& __exec, _InputIterator __first, _Size __n, _ForwardIterator __result)
+{
+    typedef typename iterator_traits<_InputIterator>::value_type _ValueType1;
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType2;
+    typedef typename iterator_traits<_InputIterator>::reference _ReferenceType1;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType2;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__invoke_if_else(
+        std::integral_constant < bool, std::is_trivial<_ValueType1>::value&& std::is_trivial<_ValueType2>::value > (),
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2_brick_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
+                [](_InputIterator __begin, _Size __sz, _ForwardIterator __res)
+                { return __pstl::__internal::__brick_copy_n(__begin, __sz, __res, __is_vector{}); });
+        },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk2_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n, __result,
+                [](_ReferenceType1 __val1, _ReferenceType2 __val2)
+                { ::new (std::addressof(__val2)) _ValueType2(std::move(__val1)); });
+        });
+}
+
+// [uninitialized.fill]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+uninitialized_fill(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, const _Tp& __value)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    __pstl::__internal::__invoke_if_else(
+        std::is_arithmetic<_ValueType>(),
+        [&]()
+        {
+            __pstl::__internal::__pattern_walk_brick(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                [&__value](_ForwardIterator __begin, _ForwardIterator __end)
+                { __pstl::__internal::__brick_fill(__begin, __end, _ValueType(__value), __is_vector{}); });
+        },
+        [&]()
+        {
+            __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                [&__value](_ReferenceType __val)
+                                                { ::new (std::addressof(__val)) _ValueType(__value); });
+        });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_fill_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n, const _Tp& __value)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__invoke_if_else(
+        std::is_arithmetic<_ValueType>(),
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk_brick_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n,
+                [&__value](_ForwardIterator __begin, _Size __count)
+                { return __pstl::__internal::__brick_fill_n(__begin, __count, _ValueType(__value), __is_vector{}); });
+        },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk1_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n,
+                [&__value](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType(__value); });
+        });
+}
+
+// [specialized.destroy]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+destroy(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__invoke_if_not(std::is_trivially_destructible<_ValueType>(),
+                                        [&]()
+                                        {
+                                            __pstl::__internal::__pattern_walk1(
+                                                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                [](_ReferenceType __val) { __val.~_ValueType(); });
+                                        });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+destroy_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__invoke_if_else(
+        std::is_trivially_destructible<_ValueType>(), [&]() { return std::next(__first, __n); },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk1_n(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                         __first, __n,
+                                                         [](_ReferenceType __val) { __val.~_ValueType(); });
+        });
+}
+
+// [uninitialized.construct.default]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+uninitialized_default_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    __pstl::__internal::__invoke_if_not(std::is_trivial<_ValueType>(),
+                                        [&]()
+                                        {
+                                            __pstl::__internal::__pattern_walk1(
+                                                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                [](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType; });
+                                        });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_default_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    return __pstl::__internal::__invoke_if_else(
+        std::is_trivial<_ValueType>(), [&]() { return std::next(__first, __n); },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk1_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n,
+                [](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType; });
+        });
+}
+
+// [uninitialized.construct.value]
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, void>
+uninitialized_value_construct(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    __pstl::__internal::__invoke_if_else(
+        std::is_trivial<_ValueType>(),
+        [&]()
+        {
+            __pstl::__internal::__pattern_walk_brick(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                [](_ForwardIterator __begin, _ForwardIterator __end)
+                { __pstl::__internal::__brick_fill(__begin, __end, _ValueType(), __is_vector{}); });
+        },
+        [&]()
+        {
+            __pstl::__internal::__pattern_walk1(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                [](_ReferenceType __val)
+                                                { ::new (std::addressof(__val)) _ValueType(); });
+        });
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Size>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator>
+uninitialized_value_construct_n(_ExecutionPolicy&& __exec, _ForwardIterator __first, _Size __n)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    typedef typename iterator_traits<_ForwardIterator>::reference _ReferenceType;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+
+    using __is_vector = typename decltype(__dispatch_tag)::__is_vector;
+
+    return __pstl::__internal::__invoke_if_else(
+        std::is_trivial<_ValueType>(),
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk_brick_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n,
+                [](_ForwardIterator __begin, _Size __count)
+                { return __pstl::__internal::__brick_fill_n(__begin, __count, _ValueType(), __is_vector{}); });
+        },
+        [&]()
+        {
+            return __pstl::__internal::__pattern_walk1_n(
+                __dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __n,
+                [](_ReferenceType __val) { ::new (std::addressof(__val)) _ValueType(); });
+        });
+}
+
+} // namespace std
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_GLUE_MEMORY_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/glue_numeric_defs.h b/libcxx/include/pstl/internal/glue_numeric_defs.h
new file mode 100644
index 0000000000000..86cd38b34b137
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_numeric_defs.h
@@ -0,0 +1,124 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_NUMERIC_DEFS_H
+#define _PSTL_GLUE_NUMERIC_DEFS_H
+
+#include <iterator>
+
+#include "execution_defs.h"
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace std
+{
+// [reduce]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
+       _BinaryOperation __binary_op);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init);
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy,
+                                                 typename iterator_traits<_ForwardIterator>::value_type>
+reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _Tp __init);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1,
+          class _BinaryOperation2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
+                 _BinaryOperation2 __binary_op2);
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
+                 _BinaryOperation __binary_op, _UnaryOperation __unary_op);
+
+// [exclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _Tp __init);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op);
+
+// [inclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _BinaryOperation __binary_op);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _BinaryOperation __binary_op, _Tp __init);
+
+// [transform.exclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation,
+          class _UnaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                         _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op,
+                         _UnaryOperation __unary_op);
+
+// [transform.inclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation,
+          class _UnaryOperation, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                         _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op,
+                         _Tp __init);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryOperation,
+          class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                         _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op);
+
+// [adjacent.
diff erence]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+adjacent_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                    _ForwardIterator2 __d_first, _BinaryOperation op);
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+adjacent_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                    _ForwardIterator2 __d_first);
+
+} // namespace std
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_GLUE_NUMERIC_DEFS_H */

diff  --git a/libcxx/include/pstl/internal/glue_numeric_impl.h b/libcxx/include/pstl/internal/glue_numeric_impl.h
new file mode 100644
index 0000000000000..ad268b51a16c9
--- /dev/null
+++ b/libcxx/include/pstl/internal/glue_numeric_impl.h
@@ -0,0 +1,232 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_GLUE_NUMERIC_IMPL_H
+#define _PSTL_GLUE_NUMERIC_IMPL_H
+
+#include <functional>
+
+#include "pstl_config.h"
+
+#include "utils.h"
+#include "numeric_fwd.h"
+#include "execution_impl.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace std
+{
+
+// [reduce]
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
+       _BinaryOperation __binary_op)
+{
+    return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, __binary_op,
+                            __pstl::__internal::__no_op());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init)
+{
+    return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, __init, std::plus<_Tp>(),
+                            __pstl::__internal::__no_op());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy,
+                                                 typename iterator_traits<_ForwardIterator>::value_type>
+reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last)
+{
+    typedef typename iterator_traits<_ForwardIterator>::value_type _ValueType;
+    return transform_reduce(std::forward<_ExecutionPolicy>(__exec), __first, __last, _ValueType{},
+                            std::plus<_ValueType>(), __pstl::__internal::__no_op());
+}
+
+// [transform.reduce]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _Tp __init)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+
+    typedef typename iterator_traits<_ForwardIterator1>::value_type _InputType;
+    return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                          __first1, __last1, __first2, __init, std::plus<_InputType>(),
+                                                          std::multiplies<_InputType>());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1,
+          class _BinaryOperation2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                 _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first1, __first2);
+    return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                          __first1, __last1, __first2, __init, __binary_op1,
+                                                          __binary_op2);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _Tp>
+transform_reduce(_ExecutionPolicy&& __exec, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
+                 _BinaryOperation __binary_op, _UnaryOperation __unary_op)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first);
+    return __pstl::__internal::__pattern_transform_reduce(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                          __first, __last, __init, __binary_op, __unary_op);
+}
+
+// [exclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _Tp __init)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using namespace __pstl;
+    return __internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                __result, __pstl::__internal::__no_op(), __init, std::plus<_Tp>(),
+                                                /*inclusive=*/std::false_type());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    using namespace __pstl;
+    return __internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                                __result, __pstl::__internal::__no_op(), __init, __binary_op,
+                                                /*inclusive=*/std::false_type());
+}
+
+// [inclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result)
+{
+    typedef typename iterator_traits<_ForwardIterator1>::value_type _InputType;
+    return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result,
+                                    std::plus<_InputType>(), __pstl::__internal::__no_op());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _BinaryOperation __binary_op)
+{
+    return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __binary_op,
+                                    __pstl::__internal::__no_op());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+               _ForwardIterator2 __result, _BinaryOperation __binary_op, _Tp __init)
+{
+    return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), __first, __last, __result, __binary_op,
+                                    __pstl::__internal::__no_op(), __init);
+}
+
+// [transform.exclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation,
+          class _UnaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform_exclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                         _ForwardIterator2 __result, _Tp __init, _BinaryOperation __binary_op,
+                         _UnaryOperation __unary_op)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                        __last, __result, __unary_op, __init, __binary_op,
+                                                        /*inclusive=*/std::false_type());
+}
+
+// [transform.inclusive.scan]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation,
+          class _UnaryOperation, class _Tp>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                         _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op,
+                         _Tp __init)
+{
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __result);
+
+    return __pstl::__internal::__pattern_transform_scan(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec), __first,
+                                                        __last, __result, __unary_op, __init, __binary_op,
+                                                        /*inclusive=*/std::true_type());
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _UnaryOperation,
+          class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+transform_inclusive_scan(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                         _ForwardIterator2 __result, _BinaryOperation __binary_op, _UnaryOperation __unary_op)
+{
+    if (__first != __last)
+    {
+        auto __tmp = __unary_op(*__first);
+        *__result = __tmp;
+        return transform_inclusive_scan(std::forward<_ExecutionPolicy>(__exec), ++__first, __last, ++__result,
+                                        __binary_op, __unary_op, __tmp);
+    }
+    else
+    {
+        return __result;
+    }
+}
+
+// [adjacent.
diff erence]
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _BinaryOperation>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+adjacent_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                    _ForwardIterator2 __d_first, _BinaryOperation __op)
+{
+
+    if (__first == __last)
+        return __d_first;
+
+    auto __dispatch_tag = __pstl::__internal::__select_backend(__exec, __first, __d_first);
+
+    return __pstl::__internal::__pattern_adjacent_
diff erence(__dispatch_tag, std::forward<_ExecutionPolicy>(__exec),
+                                                             __first, __last, __d_first, __op);
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2>
+__pstl::__internal::__enable_if_execution_policy<_ExecutionPolicy, _ForwardIterator2>
+adjacent_
diff erence(_ExecutionPolicy&& __exec, _ForwardIterator1 __first, _ForwardIterator1 __last,
+                    _ForwardIterator2 __d_first)
+{
+    typedef typename iterator_traits<_ForwardIterator1>::value_type _ValueType;
+    return adjacent_
diff erence(std::forward<_ExecutionPolicy>(__exec), __first, __last, __d_first,
+                               std::minus<_ValueType>());
+}
+
+} // namespace std
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_GLUE_NUMERIC_IMPL_H_ */

diff  --git a/libcxx/include/pstl/internal/memory_impl.h b/libcxx/include/pstl/internal/memory_impl.h
new file mode 100644
index 0000000000000..942a30ec1a65f
--- /dev/null
+++ b/libcxx/include/pstl/internal/memory_impl.h
@@ -0,0 +1,112 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_MEMORY_IMPL_H
+#define _PSTL_MEMORY_IMPL_H
+
+#include <iterator>
+
+#include "pstl_config.h"
+#include "unseq_backend_simd.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+//------------------------------------------------------------------------
+// uninitialized_move
+//------------------------------------------------------------------------
+
+template <typename _ForwardIterator, typename _OutputIterator>
+_OutputIterator
+__brick_uninitialized_move(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+                           /*vector=*/std::false_type) noexcept
+{
+    using _ValueType = typename std::iterator_traits<_OutputIterator>::value_type;
+    for (; __first != __last; ++__first, ++__result)
+    {
+        ::new (std::addressof(*__result)) _ValueType(std::move(*__first));
+    }
+    return __result;
+}
+
+template <typename _RandomAccessIterator, typename _OutputIterator>
+_OutputIterator
+__brick_uninitialized_move(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
+                           /*vector=*/std::true_type) noexcept
+{
+    using __ValueType = typename std::iterator_traits<_OutputIterator>::value_type;
+    using _ReferenceType1 = typename std::iterator_traits<_RandomAccessIterator>::reference;
+    using _ReferenceType2 = typename std::iterator_traits<_OutputIterator>::reference;
+
+    return __unseq_backend::__simd_walk_2(
+        __first, __last - __first, __result,
+        [](_ReferenceType1 __x, _ReferenceType2 __y) { ::new (std::addressof(__y)) __ValueType(std::move(__x)); });
+}
+
+template <typename _Iterator>
+void
+__brick_destroy(_Iterator __first, _Iterator __last, /*vector*/ std::false_type) noexcept
+{
+    using _ValueType = typename std::iterator_traits<_Iterator>::value_type;
+
+    for (; __first != __last; ++__first)
+        __first->~_ValueType();
+}
+
+template <typename _RandomAccessIterator>
+void
+__brick_destroy(_RandomAccessIterator __first, _RandomAccessIterator __last, /*vector*/ std::true_type) noexcept
+{
+    using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type;
+    using _ReferenceType = typename std::iterator_traits<_RandomAccessIterator>::reference;
+
+    __unseq_backend::__simd_walk_1(__first, __last - __first, [](_ReferenceType __x) { __x.~_ValueType(); });
+}
+
+//------------------------------------------------------------------------
+// uninitialized copy
+//------------------------------------------------------------------------
+
+template <typename _ForwardIterator, typename _OutputIterator>
+_OutputIterator
+__brick_uninitialized_copy(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+                           /*vector=*/std::false_type) noexcept
+{
+    using _ValueType = typename std::iterator_traits<_OutputIterator>::value_type;
+    for (; __first != __last; ++__first, ++__result)
+    {
+        ::new (std::addressof(*__result)) _ValueType(*__first);
+    }
+    return __result;
+}
+
+template <typename _RandomAccessIterator, typename _OutputIterator>
+_OutputIterator
+__brick_uninitialized_copy(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
+                           /*vector=*/std::true_type) noexcept
+{
+    using __ValueType = typename std::iterator_traits<_OutputIterator>::value_type;
+    using _ReferenceType1 = typename std::iterator_traits<_RandomAccessIterator>::reference;
+    using _ReferenceType2 = typename std::iterator_traits<_OutputIterator>::reference;
+
+    return __unseq_backend::__simd_walk_2(
+        __first, __last - __first, __result,
+        [](_ReferenceType1 __x, _ReferenceType2 __y) { ::new (std::addressof(__y)) __ValueType(__x); });
+}
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_MEMORY_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/numeric_fwd.h b/libcxx/include/pstl/internal/numeric_fwd.h
new file mode 100644
index 0000000000000..7f7845bd3e260
--- /dev/null
+++ b/libcxx/include/pstl/internal/numeric_fwd.h
@@ -0,0 +1,139 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_NUMERIC_FWD_H
+#define _PSTL_NUMERIC_FWD_H
+
+#include <type_traits>
+#include <utility>
+
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+//------------------------------------------------------------------------
+// transform_reduce (version with two binary functions, according to draft N4659)
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, class _BinaryOperation1,
+          class _BinaryOperation2>
+_Tp __brick_transform_reduce(_RandomAccessIterator1, _RandomAccessIterator1, _RandomAccessIterator2, _Tp,
+                             _BinaryOperation1, _BinaryOperation2,
+                             /*__is_vector=*/std::true_type) noexcept;
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
+_Tp __brick_transform_reduce(_ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp, _BinaryOperation1,
+                             _BinaryOperation2,
+                             /*__is_vector=*/std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp,
+          class _BinaryOperation1, class _BinaryOperation2>
+_Tp
+__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1, _ForwardIterator1, _ForwardIterator2, _Tp,
+                           _BinaryOperation1, _BinaryOperation2) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Tp, class _BinaryOperation1, class _BinaryOperation2>
+_Tp
+__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator1,
+                           _RandomAccessIterator1, _RandomAccessIterator2, _Tp, _BinaryOperation1, _BinaryOperation2);
+
+//------------------------------------------------------------------------
+// transform_reduce (version with unary and binary functions)
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>
+_Tp __brick_transform_reduce(_RandomAccessIterator, _RandomAccessIterator, _Tp, _BinaryOperation, _UnaryOperation,
+                             /*is_vector=*/std::true_type) noexcept;
+
+template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
+_Tp __brick_transform_reduce(_ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation, _UnaryOperation,
+                             /*is_vector=*/std::false_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation,
+          class _UnaryOperation>
+_Tp
+__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _Tp, _BinaryOperation,
+                           _UnaryOperation) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Tp, class _BinaryOperation,
+          class _UnaryOperation>
+_Tp
+__pattern_transform_reduce(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                           _Tp, _BinaryOperation, _UnaryOperation);
+
+//------------------------------------------------------------------------
+// transform_exclusive_scan
+//
+// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
+std::pair<_OutputIterator, _Tp> __brick_transform_scan(_ForwardIterator, _ForwardIterator, _OutputIterator,
+                                                       _UnaryOperation, _Tp, _BinaryOperation,
+                                                       /*Inclusive*/ std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
+std::pair<_OutputIterator, _Tp> __brick_transform_scan(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator,
+                                                       _UnaryOperation, _Tp, _BinaryOperation,
+                                                       /*Inclusive*/ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation,
+          class _Tp, class _BinaryOperation, class _Inclusive>
+_OutputIterator
+__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator, _UnaryOperation,
+                         _Tp, _BinaryOperation, _Inclusive) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive>
+typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type
+__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&&, _RandomAccessIterator,
+                         _RandomAccessIterator, _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive);
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive>
+typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type
+__pattern_transform_scan(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator, _RandomAccessIterator,
+                         _OutputIterator, _UnaryOperation, _Tp, _BinaryOperation, _Inclusive);
+
+//------------------------------------------------------------------------
+// adjacent_
diff erence
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
+_OutputIterator __brick_adjacent_
diff erence(_ForwardIterator, _ForwardIterator, _OutputIterator, _BinaryOperation,
+                                            /*is_vector*/ std::false_type) noexcept;
+
+template <class _RandomAccessIterator, class _OutputIterator, class _BinaryOperation>
+_OutputIterator __brick_adjacent_
diff erence(_RandomAccessIterator, _RandomAccessIterator, _OutputIterator,
+                                            _BinaryOperation,
+                                            /*is_vector*/ std::true_type) noexcept;
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
+_OutputIterator
+__pattern_adjacent_
diff erence(_Tag, _ExecutionPolicy&&, _ForwardIterator, _ForwardIterator, _OutputIterator,
+                              _BinaryOperation) noexcept;
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _BinaryOperation>
+_OutputIterator
+__pattern_adjacent_
diff erence(__parallel_tag<_IsVector>, _ExecutionPolicy&&, _RandomAccessIterator,
+                              _RandomAccessIterator, _OutputIterator, _BinaryOperation);
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_NUMERIC_FWD_H */

diff  --git a/libcxx/include/pstl/internal/numeric_impl.h b/libcxx/include/pstl/internal/numeric_impl.h
new file mode 100644
index 0000000000000..a0387aef50b00
--- /dev/null
+++ b/libcxx/include/pstl/internal/numeric_impl.h
@@ -0,0 +1,383 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_NUMERIC_IMPL_H
+#define _PSTL_NUMERIC_IMPL_H
+
+#include <iterator>
+#include <type_traits>
+#include <numeric>
+
+#include "parallel_backend.h"
+#include "pstl_config.h"
+#include "execution_impl.h"
+#include "unseq_backend_simd.h"
+#include "algorithm_fwd.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+//------------------------------------------------------------------------
+// transform_reduce (version with two binary functions, according to draft N4659)
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _Tp, class _BinaryOperation1, class _BinaryOperation2>
+_Tp
+__brick_transform_reduce(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _Tp __init,
+                         _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2,
+                         /*is_vector=*/std::false_type) noexcept
+{
+    return std::inner_product(__first1, __last1, __first2, __init, __binary_op1, __binary_op2);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _Tp, class _BinaryOperation1,
+          class _BinaryOperation2>
+_Tp
+__brick_transform_reduce(_RandomAccessIterator1 __first1, _RandomAccessIterator1 __last1,
+                         _RandomAccessIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
+                         _BinaryOperation2 __binary_op2,
+                         /*is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType;
+    return __unseq_backend::__simd_transform_reduce(
+        __last1 - __first1, __init, __binary_op1,
+        [=, &__binary_op2](_DifferenceType __i) { return __binary_op2(__first1[__i], __first2[__i]); });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator1, class _ForwardIterator2, class _Tp,
+          class _BinaryOperation1, class _BinaryOperation2>
+_Tp
+__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator1 __first1, _ForwardIterator1 __last1,
+                           _ForwardIterator2 __first2, _Tp __init, _BinaryOperation1 __binary_op1,
+                           _BinaryOperation2 __binary_op2) noexcept
+{
+    return __brick_transform_reduce(__first1, __last1, __first2, __init, __binary_op1, __binary_op2,
+                                    typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _Tp, class _BinaryOperation1, class _BinaryOperation2>
+_Tp
+__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator1 __first1,
+                           _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _Tp __init,
+                           _BinaryOperation1 __binary_op1, _BinaryOperation2 __binary_op2)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __par_backend::__parallel_transform_reduce(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first1, __last1,
+                [__first1, __first2, __binary_op2](_RandomAccessIterator1 __i) mutable
+                { return __binary_op2(*__i, *(__first2 + (__i - __first1))); },
+                __init,
+                __binary_op1, // Combine
+                [__first1, __first2, __binary_op1, __binary_op2](_RandomAccessIterator1 __i, _RandomAccessIterator1 __j,
+                                                                 _Tp __init) -> _Tp
+                {
+                    return __internal::__brick_transform_reduce(__i, __j, __first2 + (__i - __first1), __init,
+                                                                __binary_op1, __binary_op2, _IsVector{});
+                });
+        });
+}
+
+//------------------------------------------------------------------------
+// transform_reduce (version with unary and binary functions)
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _Tp, class _BinaryOperation, class _UnaryOperation>
+_Tp
+__brick_transform_reduce(_ForwardIterator __first, _ForwardIterator __last, _Tp __init, _BinaryOperation __binary_op,
+                         _UnaryOperation __unary_op, /*is_vector=*/std::false_type) noexcept
+{
+    return std::transform_reduce(__first, __last, __init, __binary_op, __unary_op);
+}
+
+template <class _RandomAccessIterator, class _Tp, class _UnaryOperation, class _BinaryOperation>
+_Tp
+__brick_transform_reduce(_RandomAccessIterator __first, _RandomAccessIterator __last, _Tp __init,
+                         _BinaryOperation __binary_op, _UnaryOperation __unary_op,
+                         /*is_vector=*/std::true_type) noexcept
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _DifferenceType;
+    return __unseq_backend::__simd_transform_reduce(
+        __last - __first, __init, __binary_op,
+        [=, &__unary_op](_DifferenceType __i) { return __unary_op(__first[__i]); });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _Tp, class _BinaryOperation,
+          class _UnaryOperation>
+_Tp
+__pattern_transform_reduce(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Tp __init,
+                           _BinaryOperation __binary_op, _UnaryOperation __unary_op) noexcept
+{
+    return __internal::__brick_transform_reduce(__first, __last, __init, __binary_op, __unary_op,
+                                                typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _Tp, class _BinaryOperation,
+          class _UnaryOperation>
+_Tp
+__pattern_transform_reduce(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                           _RandomAccessIterator __last, _Tp __init, _BinaryOperation __binary_op,
+                           _UnaryOperation __unary_op)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            return __par_backend::__parallel_transform_reduce(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                [__unary_op](_RandomAccessIterator __i) mutable { return __unary_op(*__i); }, __init, __binary_op,
+                [__unary_op, __binary_op](_RandomAccessIterator __i, _RandomAccessIterator __j, _Tp __init) {
+                    return __internal::__brick_transform_reduce(__i, __j, __init, __binary_op, __unary_op, _IsVector{});
+                });
+        });
+}
+
+//------------------------------------------------------------------------
+// transform_exclusive_scan
+//
+// walk3 evaluates f(x,y,z) for (x,y,z) drawn from [first1,last1), [first2,...), [first3,...)
+//------------------------------------------------------------------------
+
+// Exclusive form
+template <class _ForwardIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
+std::pair<_OutputIterator, _Tp>
+__brick_transform_scan(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __result,
+                       _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
+                       /*Inclusive*/ std::false_type, /*is_vector=*/std::false_type) noexcept
+{
+    for (; __first != __last; ++__first, ++__result)
+    {
+        *__result = __init;
+        _PSTL_PRAGMA_FORCEINLINE
+        __init = __binary_op(__init, __unary_op(*__first));
+    }
+    return std::make_pair(__result, __init);
+}
+
+// Inclusive form
+template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation>
+std::pair<_OutputIterator, _Tp>
+__brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
+                       _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
+                       /*Inclusive*/ std::true_type, /*is_vector=*/std::false_type) noexcept
+{
+    for (; __first != __last; ++__first, ++__result)
+    {
+        _PSTL_PRAGMA_FORCEINLINE
+        __init = __binary_op(__init, __unary_op(*__first));
+        *__result = __init;
+    }
+    return std::make_pair(__result, __init);
+}
+
+// type is arithmetic and binary operation is a user defined operation.
+template <typename _Tp, typename _BinaryOperation>
+using is_arithmetic_udop = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
+                                                            !std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
+
+// [restriction] - T shall be DefaultConstructible.
+// [violation] - default ctor of T shall set the identity value for binary_op.
+template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
+          class _Inclusive>
+typename std::enable_if<!is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
+__brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
+                       _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
+                       /*is_vector=*/std::true_type) noexcept
+{
+#if defined(_PSTL_UDS_PRESENT)
+    return __unseq_backend::__simd_scan(__first, __last - __first, __result, __unary_op, __init, __binary_op,
+                                        _Inclusive());
+#else
+    // We need to call serial brick here to call function for inclusive and exclusive scan that depends on _Inclusive() value
+    return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
+                                              /*is_vector=*/std::false_type());
+#endif
+}
+
+template <class _RandomAccessIterator, class _OutputIterator, class _UnaryOperation, class _Tp, class _BinaryOperation,
+          class _Inclusive>
+typename std::enable_if<is_arithmetic_udop<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
+__brick_transform_scan(_RandomAccessIterator __first, _RandomAccessIterator __last, _OutputIterator __result,
+                       _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op, _Inclusive,
+                       /*is_vector=*/std::true_type) noexcept
+{
+    return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
+                                              /*is_vector=*/std::false_type());
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _UnaryOperation,
+          class _Tp, class _BinaryOperation, class _Inclusive>
+_OutputIterator
+__pattern_transform_scan(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                         _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init, _BinaryOperation __binary_op,
+                         _Inclusive) noexcept
+{
+    return __internal::__brick_transform_scan(__first, __last, __result, __unary_op, __init, __binary_op, _Inclusive(),
+                                              typename _Tag::__is_vector{})
+        .first;
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive>
+typename std::enable_if<!std::is_floating_point<_Tp>::value, _OutputIterator>::type
+__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                         _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
+                         _BinaryOperation __binary_op, _Inclusive)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _DifferenceType;
+
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_transform_scan(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __last - __first,
+                [__first, __unary_op](_DifferenceType __i) mutable { return __unary_op(__first[__i]); }, __init,
+                __binary_op,
+                [__first, __unary_op, __binary_op](_DifferenceType __i, _DifferenceType __j, _Tp __init)
+                {
+                    // Execute serial __brick_transform_reduce, due to the explicit SIMD vectorization (reduction) requires a commutative operation for the guarantee of correct scan.
+                    return __internal::__brick_transform_reduce(__first + __i, __first + __j, __init, __binary_op,
+                                                                __unary_op,
+                                                                /*__is_vector*/ std::false_type());
+                },
+                [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __j, _Tp __init)
+                {
+                    return __internal::__brick_transform_scan(__first + __i, __first + __j, __result + __i, __unary_op,
+                                                              __init, __binary_op, _Inclusive(), _IsVector{})
+                        .second;
+                });
+            return __result + (__last - __first);
+        });
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator, class _OutputIterator,
+          class _UnaryOperation, class _Tp, class _BinaryOperation, class _Inclusive>
+typename std::enable_if<std::is_floating_point<_Tp>::value, _OutputIterator>::type
+__pattern_transform_scan(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec, _RandomAccessIterator __first,
+                         _RandomAccessIterator __last, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
+                         _BinaryOperation __binary_op, _Inclusive)
+{
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _DifferenceType;
+    _DifferenceType __n = __last - __first;
+
+    if (__n <= 0)
+    {
+        return __result;
+    }
+    return __internal::__except_handler(
+        [&]()
+        {
+            __par_backend::__parallel_strict_scan(
+                __backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __n, __init,
+                [__first, __unary_op, __binary_op, __result](_DifferenceType __i, _DifferenceType __len)
+                {
+                    return __internal::__brick_transform_scan(__first + __i, __first + (__i + __len), __result + __i,
+                                                              __unary_op, _Tp{}, __binary_op, _Inclusive(), _IsVector{})
+                        .second;
+                },
+                __binary_op,
+                [__result, &__binary_op](_DifferenceType __i, _DifferenceType __len, _Tp __initial)
+                {
+                    return *(std::transform(__result + __i, __result + __i + __len, __result + __i,
+                                            [&__initial, &__binary_op](const _Tp& __x)
+                                            {
+                                                _PSTL_PRAGMA_FORCEINLINE
+                                                return __binary_op(__initial, __x);
+                                            }) -
+                             1);
+                },
+                [](_Tp) {});
+            return __result + (__last - __first);
+        });
+}
+
+//------------------------------------------------------------------------
+// adjacent_
diff erence
+//------------------------------------------------------------------------
+
+template <class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
+_OutputIterator
+__brick_adjacent_
diff erence(_ForwardIterator __first, _ForwardIterator __last, _OutputIterator __d_first,
+                            _BinaryOperation __op, /*is_vector*/ std::false_type) noexcept
+{
+    return std::adjacent_
diff erence(__first, __last, __d_first, __op);
+}
+
+template <class _RandomAccessIterator1, class _RandomAccessIterator2, class BinaryOperation>
+_RandomAccessIterator2
+__brick_adjacent_
diff erence(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last,
+                            _RandomAccessIterator2 __d_first, BinaryOperation __op,
+                            /*is_vector=*/std::true_type) noexcept
+{
+    _PSTL_ASSERT(__first != __last);
+
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2;
+
+    auto __n = __last - __first;
+    *__d_first = *__first;
+    return __unseq_backend::__simd_walk_3(
+        __first + 1, __n - 1, __first, __d_first + 1,
+        [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z) { __z = __op(__x, __y); });
+}
+
+template <class _Tag, class _ExecutionPolicy, class _ForwardIterator, class _OutputIterator, class _BinaryOperation>
+_OutputIterator
+__pattern_adjacent_
diff erence(_Tag, _ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last,
+                              _OutputIterator __d_first, _BinaryOperation __op) noexcept
+{
+    return __internal::__brick_adjacent_
diff erence(__first, __last, __d_first, __op, typename _Tag::__is_vector{});
+}
+
+template <class _IsVector, class _ExecutionPolicy, class _RandomAccessIterator1, class _RandomAccessIterator2,
+          class _BinaryOperation>
+_RandomAccessIterator2
+__pattern_adjacent_
diff erence(__parallel_tag<_IsVector> __tag, _ExecutionPolicy&& __exec,
+                              _RandomAccessIterator1 __first, _RandomAccessIterator1 __last,
+                              _RandomAccessIterator2 __d_first, _BinaryOperation __op)
+{
+    _PSTL_ASSERT(__first != __last);
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::reference _ReferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::reference _ReferenceType2;
+
+    using __backend_tag = typename decltype(__tag)::__backend_tag;
+
+    *__d_first = *__first;
+    __par_backend::__parallel_for(__backend_tag{}, std::forward<_ExecutionPolicy>(__exec), __first, __last - 1,
+                                  [&__op, __d_first, __first](_RandomAccessIterator1 __b, _RandomAccessIterator1 __e)
+                                  {
+                                      _RandomAccessIterator2 __d_b = __d_first + (__b - __first);
+                                      __internal::__brick_walk3(
+                                          __b, __e, __b + 1, __d_b + 1,
+                                          [&__op](_ReferenceType1 __x, _ReferenceType1 __y, _ReferenceType2 __z)
+                                          { __z = __op(__y, __x); },
+                                          _IsVector{});
+                                  });
+    return __d_first + (__last - __first);
+}
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_NUMERIC_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/omp/parallel_for.h b/libcxx/include/pstl/internal/omp/parallel_for.h
new file mode 100644
index 0000000000000..0f841e5311810
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_for.h
@@ -0,0 +1,64 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
+
+#include <cstddef>
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <class _Index, class _Fp>
+void
+__parallel_for_body(_Index __first, _Index __last, _Fp __f)
+{
+    // initial partition of the iteration space into chunks
+    auto __policy = __omp_backend::__chunk_partitioner(__first, __last);
+
+    // To avoid over-subscription we use taskloop for the nested parallelism
+    _PSTL_PRAGMA(omp taskloop untied mergeable)
+    for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
+    {
+        __pstl::__omp_backend::__process_chunk(__policy, __first, __chunk, __f);
+    }
+}
+
+//------------------------------------------------------------------------
+// Notation:
+// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last)
+//------------------------------------------------------------------------
+
+template <class _ExecutionPolicy, class _Index, class _Fp>
+void
+__parallel_for(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
+{
+    if (omp_in_parallel())
+    {
+        // we don't create a nested parallel region in an existing parallel
+        // region: just create tasks
+        __pstl::__omp_backend::__parallel_for_body(__first, __last, __f);
+    }
+    else
+    {
+        // in any case (nested or non-nested) one parallel region is created and
+        // only one thread creates a set of tasks
+        _PSTL_PRAGMA(omp parallel)
+        _PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); }
+    }
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_for_each.h b/libcxx/include/pstl/internal/omp/parallel_for_each.h
new file mode 100644
index 0000000000000..b9bfb05930a19
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_for_each.h
@@ -0,0 +1,59 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <class _ForwardIterator, class _Fp>
+void
+__parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp __f)
+{
+    using DifferenceType = typename std::iterator_traits<_ForwardIterator>::
diff erence_type;
+    // TODO: Think of an approach to remove the std::distance call
+    auto __size = std::distance(__first, __last);
+
+    _PSTL_PRAGMA(omp taskloop untied mergeable)
+    for (DifferenceType __index = 0; __index < __size; ++__index)
+    {
+        // TODO: Think of an approach to remove the increment here each time.
+        auto __iter = std::next(__first, __index);
+        __f(*__iter);
+    }
+}
+
+template <class _ExecutionPolicy, class _ForwardIterator, class _Fp>
+void
+__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f)
+{
+    if (omp_in_parallel())
+    {
+        // we don't create a nested parallel region in an existing parallel
+        // region: just create tasks
+        __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f);
+    }
+    else
+    {
+        // in any case (nested or non-nested) one parallel region is created and
+        // only one thread creates a set of tasks
+        _PSTL_PRAGMA(omp parallel)
+        _PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); }
+    }
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_invoke.h b/libcxx/include/pstl/internal/omp/parallel_invoke.h
new file mode 100644
index 0000000000000..045ccbe9a18cb
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_invoke.h
@@ -0,0 +1,50 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <typename _F1, typename _F2>
+void
+__parallel_invoke_body(_F1&& __f1, _F2&& __f2)
+{
+    _PSTL_PRAGMA(omp taskgroup)
+    {
+        _PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F1>(__f1)(); }
+        _PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F2>(__f2)(); }
+    }
+}
+
+template <class _ExecutionPolicy, typename _F1, typename _F2>
+void
+__parallel_invoke(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2)
+{
+    if (omp_in_parallel())
+    {
+        __pstl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
+    }
+    else
+    {
+        _PSTL_PRAGMA(omp parallel)
+        _PSTL_PRAGMA(omp single nowait)
+        __pstl::__omp_backend::__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
+    }
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_merge.h b/libcxx/include/pstl/internal/omp/parallel_merge.h
new file mode 100644
index 0000000000000..e6f82c5e1866c
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_merge.h
@@ -0,0 +1,98 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3,
+          typename _Compare, typename _LeafMerge>
+void
+__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs,
+                      _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye,
+                      _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge)
+{
+
+    if (__size_x + __size_y <= __omp_backend::__default_chunk_size)
+    {
+        __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp);
+        return;
+    }
+
+    _RandomAccessIterator1 __xm;
+    _RandomAccessIterator2 __ym;
+
+    if (__size_x < __size_y)
+    {
+        __ym = __ys + (__size_y / 2);
+        __xm = std::upper_bound(__xs, __xe, *__ym, __comp);
+    }
+    else
+    {
+        __xm = __xs + (__size_x / 2);
+        __ym = std::lower_bound(__ys, __ye, *__xm, __comp);
+    }
+
+    auto __zm = __zs + (__xm - __xs) + (__ym - __ys);
+
+    _PSTL_PRAGMA(omp task untied mergeable default(none)
+                     firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge))
+    __pstl::__omp_backend::__parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp,
+                                                      __leaf_merge);
+
+    _PSTL_PRAGMA(omp task untied mergeable default(none)
+                     firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge))
+    __pstl::__omp_backend::__parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp,
+                                                      __leaf_merge);
+
+    _PSTL_PRAGMA(omp taskwait)
+}
+
+template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2,
+          typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge>
+void
+__parallel_merge(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs,
+                 _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye,
+                 _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge)
+
+{
+    std::size_t __size_x = __xe - __xs;
+    std::size_t __size_y = __ye - __ys;
+
+    /*
+     * Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the
+     * larger range as the search range.
+     */
+
+    if (omp_in_parallel())
+    {
+        __pstl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp,
+                                                          __leaf_merge);
+    }
+    else
+    {
+        _PSTL_PRAGMA(omp parallel)
+        {
+            _PSTL_PRAGMA(omp single nowait)
+            __pstl::__omp_backend::__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp,
+                                                              __leaf_merge);
+        }
+    }
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_reduce.h b/libcxx/include/pstl/internal/omp/parallel_reduce.h
new file mode 100644
index 0000000000000..841d48fc30515
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_reduce.h
@@ -0,0 +1,73 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction>
+_Value
+__parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity,
+                       _RealBody __real_body, _Reduction __reduce)
+{
+    if (__should_run_serial(__first, __last))
+    {
+        return __real_body(__first, __last, __identity);
+    }
+
+    auto __middle = __first + ((__last - __first) / 2);
+    _Value __v1(__identity), __v2(__identity);
+    __parallel_invoke_body(
+        [&]() { __v1 = __parallel_reduce_body(__first, __middle, __identity, __real_body, __reduce); },
+        [&]() { __v2 = __parallel_reduce_body(__middle, __last, __identity, __real_body, __reduce); });
+
+    return __reduce(__v1, __v2);
+}
+
+//------------------------------------------------------------------------
+// Notation:
+//      r(i,j,init) returns reduction of init with reduction over [i,j)
+//      c(x,y) combines values x and y that were the result of r
+//------------------------------------------------------------------------
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction>
+_Value
+__parallel_reduce(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first,
+                  _RandomAccessIterator __last, _Value __identity, _RealBody __real_body, _Reduction __reduction)
+{
+    // We don't create a nested parallel region in an existing parallel region:
+    // just create tasks.
+    if (omp_in_parallel())
+    {
+        return __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction);
+    }
+
+    // In any case (nested or non-nested) one parallel region is created and only
+    // one thread creates a set of tasks.
+    _Value __res = __identity;
+
+    _PSTL_PRAGMA(omp parallel)
+    _PSTL_PRAGMA(omp single nowait)
+    {
+        __res = __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction);
+    }
+
+    return __res;
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_scan.h b/libcxx/include/pstl/internal/omp/parallel_scan.h
new file mode 100644
index 0000000000000..f3eb967ca4fb2
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_scan.h
@@ -0,0 +1,136 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H
+
+#include "parallel_invoke.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <typename _Index>
+_Index
+__split(_Index __m)
+{
+    _Index __k = 1;
+    while (2 * __k < __m)
+        __k *= 2;
+    return __k;
+}
+
+template <typename _Index, typename _Tp, typename _Rp, typename _Cp>
+void
+__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine)
+{
+    if (__m == 1)
+        __r[0] = __reduce(__i * __tilesize, __lastsize);
+    else
+    {
+        _Index __k = __split(__m);
+        __omp_backend::__parallel_invoke_body(
+            [=] { __omp_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); },
+            [=] {
+                __omp_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine);
+            });
+        if (__m == 2 * __k)
+            __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]);
+    }
+}
+
+template <typename _Index, typename _Tp, typename _Cp, typename _Sp>
+void
+__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine,
+            _Sp __scan)
+{
+    if (__m == 1)
+        __scan(__i * __tilesize, __lastsize, __initial);
+    else
+    {
+        const _Index __k = __split(__m);
+        __omp_backend::__parallel_invoke_body(
+            [=] { __omp_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); },
+            // Assumes that __combine never throws.
+            // TODO: Consider adding a requirement for user functors to be constant.
+            [=, &__combine]
+            {
+                __omp_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize,
+                                           __combine(__initial, __r[__k - 1]), __combine, __scan);
+            });
+    }
+}
+
+template <typename _ExecutionPolicy, typename _Index, typename _Tp, typename _Rp, typename _Cp, typename _Sp,
+          typename _Ap>
+void
+__parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex)
+{
+    _Index __p = omp_get_num_threads();
+    const _Index __slack = 4;
+    _Index __tilesize = (__n - 1) / (__slack * __p) + 1;
+    _Index __m = (__n - 1) / __tilesize;
+    __buffer<_Tp> __buf(__m + 1);
+    _Tp* __r = __buf.get();
+
+    __omp_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine);
+
+    std::size_t __k = __m + 1;
+    _Tp __t = __r[__k - 1];
+    while ((__k &= __k - 1))
+    {
+        __t = __combine(__r[__k - 1], __t);
+    }
+
+    __apex(__combine(__initial, __t));
+    __omp_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial,
+                               __combine, __scan);
+}
+
+template <class _ExecutionPolicy, typename _Index, typename _Tp, typename _Rp, typename _Cp, typename _Sp, typename _Ap>
+void
+__parallel_strict_scan(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial,
+                       _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex)
+{
+    if (__n <= __default_chunk_size)
+    {
+        _Tp __sum = __initial;
+        if (__n)
+        {
+            __sum = __combine(__sum, __reduce(_Index(0), __n));
+        }
+        __apex(__sum);
+        if (__n)
+        {
+            __scan(_Index(0), __n, __initial);
+        }
+        return;
+    }
+
+    if (omp_in_parallel())
+    {
+        __pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine,
+                                                                             __scan, __apex);
+    }
+    else
+    {
+        _PSTL_PRAGMA(omp parallel)
+        _PSTL_PRAGMA(omp single nowait)
+        {
+            __pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine,
+                                                                                 __scan, __apex);
+        }
+    }
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_stable_partial_sort.h b/libcxx/include/pstl/internal/omp/parallel_stable_partial_sort.h
new file mode 100644
index 0000000000000..06cd55bb4899b
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_stable_partial_sort.h
@@ -0,0 +1,33 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
+void
+__parallel_stable_partial_sort(__pstl::__internal::__openmp_backend_tag, _RandomAccessIterator __xs,
+                               _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort,
+                               std::size_t /* __nsort */)
+{
+    // TODO: "Parallel partial sort needs to be implemented.");
+    __leaf_sort(__xs, __xe, __comp);
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_stable_sort.h b/libcxx/include/pstl/internal/omp/parallel_stable_sort.h
new file mode 100644
index 0000000000000..e4d0676627260
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_stable_sort.h
@@ -0,0 +1,160 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H
+
+#include "util.h"
+#include "parallel_merge.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+namespace __sort_details
+{
+struct __move_value
+{
+    template <typename _Iterator, typename _OutputIterator>
+    void
+    operator()(_Iterator __x, _OutputIterator __z) const
+    {
+        *__z = std::move(*__x);
+    }
+};
+
+template <typename _RandomAccessIterator, typename _OutputIterator>
+_OutputIterator
+__parallel_move_range(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first)
+{
+    std::size_t __size = __last1 - __first1;
+
+    // Perform serial moving of small chunks
+
+    if (__size <= __default_chunk_size)
+    {
+        return std::move(__first1, __last1, __d_first);
+    }
+
+    // Perform parallel moving of larger chunks
+    auto __policy = __pstl::__omp_backend::__chunk_partitioner(__first1, __last1);
+
+    _PSTL_PRAGMA(omp taskloop)
+    for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
+    {
+        __pstl::__omp_backend::__process_chunk(__policy, __first1, __chunk,
+                                       [&](auto __chunk_first, auto __chunk_last)
+                                       {
+                                           auto __chunk_offset = __chunk_first - __first1;
+                                           auto __output_it = __d_first + __chunk_offset;
+                                           std::move(__chunk_first, __chunk_last, __output_it);
+                                       });
+    }
+
+    return __d_first + __size;
+}
+
+struct __move_range
+{
+    template <typename _RandomAccessIterator, typename _OutputIterator>
+    _OutputIterator
+    operator()(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first) const
+    {
+        return __pstl::__omp_backend::__sort_details::__parallel_move_range(__first1, __last1, __d_first);
+    }
+};
+} // namespace __sort_details
+
+template <typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
+void
+__parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp,
+                            _LeafSort __leaf_sort)
+{
+    using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type;
+    using _VecType = typename std::vector<_ValueType>;
+    using _OutputIterator = typename _VecType::iterator;
+    using _MoveValue = typename __omp_backend::__sort_details::__move_value;
+    using _MoveRange = __omp_backend::__sort_details::__move_range;
+
+    if (__should_run_serial(__xs, __xe))
+    {
+        __leaf_sort(__xs, __xe, __comp);
+    }
+    else
+    {
+        std::size_t __size = __xe - __xs;
+        auto __mid = __xs + (__size / 2);
+        __pstl::__omp_backend::__parallel_invoke_body(
+            [&]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); },
+            [&]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); });
+
+        // Perform a parallel merge of the sorted ranges into __output_data.
+        _VecType __output_data(__size);
+        _MoveValue __move_value;
+        _MoveRange __move_range;
+        __utils::__serial_move_merge __merge(__size);
+        __pstl::__omp_backend::__parallel_merge_body(
+            __mid - __xs, __xe - __mid, __xs, __mid, __mid, __xe, __output_data.begin(), __comp,
+            [&__merge, &__move_value, &__move_range](_RandomAccessIterator __as, _RandomAccessIterator __ae,
+                                                     _RandomAccessIterator __bs, _RandomAccessIterator __be,
+                                                     _OutputIterator __cs, _Compare __comp)
+            { __merge(__as, __ae, __bs, __be, __cs, __comp, __move_value, __move_value, __move_range, __move_range); });
+
+        // Move the values from __output_data back in the original source range.
+        __pstl::__omp_backend::__sort_details::__parallel_move_range(__output_data.begin(), __output_data.end(), __xs);
+    }
+}
+
+template <class _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
+void
+__parallel_stable_sort(__pstl::__internal::__openmp_backend_tag __tag, _ExecutionPolicy&& /*__exec*/,
+                       _RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort,
+                       std::size_t __nsort = 0)
+{
+    auto __count = static_cast<std::size_t>(__xe - __xs);
+    if (__count <= __default_chunk_size || __nsort < __count)
+    {
+        __leaf_sort(__xs, __xe, __comp);
+        return;
+    }
+
+    // TODO: the partial sort implementation should
+    // be shared with the other backends.
+
+    if (omp_in_parallel())
+    {
+        if (__count <= __nsort)
+        {
+            __pstl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
+        }
+        else
+        {
+            __pstl::__omp_backend::__parallel_stable_partial_sort(__tag, __xs, __xe, __comp, __leaf_sort, __nsort);
+        }
+    }
+    else
+    {
+        _PSTL_PRAGMA(omp parallel)
+        _PSTL_PRAGMA(omp single nowait)
+        if (__count <= __nsort)
+        {
+            __pstl::__omp_backend::__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
+        }
+        else
+        {
+            __pstl::__omp_backend::__parallel_stable_partial_sort(__tag, __xs, __xe, __comp, __leaf_sort, __nsort);
+        }
+    }
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_transform_reduce.h b/libcxx/include/pstl/internal/omp/parallel_transform_reduce.h
new file mode 100644
index 0000000000000..1d4cc0a5974d2
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_transform_reduce.h
@@ -0,0 +1,113 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+//------------------------------------------------------------------------
+// parallel_transform_reduce
+//
+// Notation:
+//      r(i,j,init) returns reduction of init with reduction over [i,j)
+//      u(i) returns f(i,i+1,identity) for a hypothetical left identity element
+//      of r c(x,y) combines values x and y that were the result of r or u
+//------------------------------------------------------------------------
+
+template <class _RandomAccessIterator, class _UnaryOp, class _Value, class _Combiner, class _Reduction>
+auto
+__transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init,
+                        _Combiner __combiner, _Reduction __reduction)
+{
+    const std::size_t __num_threads = omp_get_num_threads();
+    const std::size_t __size = __last - __first;
+
+    // Initial partition of the iteration space into chunks. If the range is too small,
+    // this will result in a nonsense policy, so we check on the size as well below.
+    auto __policy = __omp_backend::__chunk_partitioner(__first + __num_threads, __last);
+
+    if (__size <= __num_threads || __policy.__n_chunks < 2)
+    {
+        return __reduction(__first, __last, __init);
+    }
+
+    // Here, we cannot use OpenMP UDR because we must store the init value in
+    // the combiner and it will be used several times. Although there should be
+    // the only one; we manually generate the identity elements for each thread.
+    std::vector<_Value> __accums;
+    __accums.reserve(__num_threads);
+
+    // initialize accumulators for all threads
+    for (std::size_t __i = 0; __i < __num_threads; ++__i)
+    {
+        __accums.emplace_back(__unary_op(__first + __i));
+    }
+
+    // main loop
+    _PSTL_PRAGMA(omp taskloop shared(__accums))
+    for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
+    {
+        __pstl::__omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk,
+                                       [&](auto __chunk_first, auto __chunk_last)
+                                       {
+                                           auto __thread_num = omp_get_thread_num();
+                                           __accums[__thread_num] =
+                                               __reduction(__chunk_first, __chunk_last, __accums[__thread_num]);
+                                       });
+    }
+
+    // combine by accumulators
+    for (std::size_t __i = 0; __i < __num_threads; ++__i)
+    {
+        __init = __combiner(__init, __accums[__i]);
+    }
+
+    return __init;
+}
+
+template <class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryOp, class _Value, class _Combiner,
+          class _Reduction>
+_Value
+__parallel_transform_reduce(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first,
+                            _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init, _Combiner __combiner,
+                            _Reduction __reduction)
+{
+    _Value __result = __init;
+    if (omp_in_parallel())
+    {
+        // We don't create a nested parallel region in an existing parallel
+        // region: just create tasks
+        __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner,
+                                                                  __reduction);
+    }
+    else
+    {
+        // Create a parallel region, and a single thread will create tasks
+        // for the region.
+        _PSTL_PRAGMA(omp parallel)
+        _PSTL_PRAGMA(omp single nowait)
+        {
+            __result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner,
+                                                                      __reduction);
+        }
+    }
+
+    return __result;
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H

diff  --git a/libcxx/include/pstl/internal/omp/parallel_transform_scan.h b/libcxx/include/pstl/internal/omp/parallel_transform_scan.h
new file mode 100644
index 0000000000000..f83628994bc21
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/parallel_transform_scan.h
@@ -0,0 +1,32 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H
+#define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H
+
+#include "util.h"
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+template <class _ExecutionPolicy, class _Index, class _Up, class _Tp, class _Cp, class _Rp, class _Sp>
+_Tp
+__parallel_transform_scan(__pstl::__internal::__openmp_backend_tag, _ExecutionPolicy&&, _Index __n, _Up /* __u */,
+                          _Tp __init, _Cp /* __combine */, _Rp /* __brick_reduce */, _Sp __scan)
+{
+    // TODO: parallelize this function.
+    return __scan(_Index(0), __n, __init);
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+#endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H

diff  --git a/libcxx/include/pstl/internal/omp/util.h b/libcxx/include/pstl/internal/omp/util.h
new file mode 100644
index 0000000000000..c88d9808379cf
--- /dev/null
+++ b/libcxx/include/pstl/internal/omp/util.h
@@ -0,0 +1,173 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_INTERNAL_OMP_UTIL_H
+#define _PSTL_INTERNAL_OMP_UTIL_H
+
+#include <algorithm>
+#include <atomic>
+#include <iterator>
+#include <cstddef>
+#include <cstdio>
+#include <memory>
+#include <vector>
+#include <omp.h>
+
+#include "../parallel_backend_utils.h"
+#include "../unseq_backend_simd.h"
+#include "../utils.h"
+
+// Portability "#pragma" definition
+#ifdef _MSC_VER
+#    define _PSTL_PRAGMA(x) __pragma(x)
+#else
+#    define _PSTL_PRAGMA(x) _Pragma(#    x)
+#endif
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __omp_backend
+{
+
+//------------------------------------------------------------------------
+// use to cancel execution
+//------------------------------------------------------------------------
+inline void
+__cancel_execution()
+{
+    // TODO: Figure out how to make cancelation work.
+}
+
+//------------------------------------------------------------------------
+// raw buffer
+//------------------------------------------------------------------------
+
+template <typename _Tp>
+class __buffer
+{
+    std::allocator<_Tp> __allocator_;
+    _Tp* __ptr_;
+    const std::size_t __buf_size_;
+    __buffer(const __buffer&) = delete;
+    void
+    operator=(const __buffer&) = delete;
+
+  public:
+    __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
+
+    operator bool() const { return __ptr_ != nullptr; }
+
+    _Tp*
+    get() const
+    {
+        return __ptr_;
+    }
+    ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
+};
+
+// Preliminary size of each chunk: requires further discussion
+inline constexpr std::size_t __default_chunk_size = 2048;
+
+// Convenience function to determine when we should run serial.
+template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
+constexpr auto
+__should_run_serial(_Iterator __first, _Iterator __last) -> bool
+{
+    using _
diff erence_type = typename std::iterator_traits<_Iterator>::
diff erence_type;
+    auto __size = std::distance(__first, __last);
+    return __size <= static_cast<_
diff erence_type>(__default_chunk_size);
+}
+
+template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
+constexpr auto
+__should_run_serial(_Index __first, _Index __last) -> bool
+{
+    using _
diff erence_type = _Index;
+    auto __size = __last - __first;
+    return __size <= static_cast<_
diff erence_type>(__default_chunk_size);
+}
+
+struct __chunk_metrics
+{
+    std::size_t __n_chunks;
+    std::size_t __chunk_size;
+    std::size_t __first_chunk_size;
+};
+
+// The iteration space partitioner according to __requested_chunk_size
+template <class _RandomAccessIterator, class _Size = std::size_t>
+auto
+__chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
+                    _Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
+{
+    /*
+     * This algorithm improves distribution of elements in chunks by avoiding
+     * small tail chunks. The leftover elements that do not fit neatly into
+     * the chunk size are redistributed to early chunks. This improves
+     * utilization of the processor's prefetch and reduces the number of
+     * tasks needed by 1.
+     */
+
+    const _Size __n = __last - __first;
+    _Size __n_chunks = 0;
+    _Size __chunk_size = 0;
+    _Size __first_chunk_size = 0;
+    if (__n < __requested_chunk_size)
+    {
+        __chunk_size = __n;
+        __first_chunk_size = __n;
+        __n_chunks = 1;
+        return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
+    }
+
+    __n_chunks = (__n / __requested_chunk_size) + 1;
+    __chunk_size = __n / __n_chunks;
+    __first_chunk_size = __chunk_size;
+    const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
+
+    if (__n_leftover_items == __chunk_size)
+    {
+        __n_chunks += 1;
+        return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
+    }
+    else if (__n_leftover_items == 0)
+    {
+        __first_chunk_size = __chunk_size;
+        return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
+    }
+
+    const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
+    const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
+
+    __chunk_size += __n_extra_items_per_chunk;
+    __first_chunk_size = __chunk_size + __n_final_leftover_items;
+
+    return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
+}
+
+template <typename _Iterator, typename _Index, typename _Func>
+void
+__process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
+{
+    auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
+    auto __index = __chunk_index == 0 ? 0
+                                      : (__chunk_index * __metrics.__chunk_size) +
+                                            (__metrics.__first_chunk_size - __metrics.__chunk_size);
+    auto __first = __base + __index;
+    auto __last = __first + __this_chunk_size;
+    __f(__first, __last);
+}
+
+} // namespace __omp_backend
+} // namespace __pstl
+
+#endif // _PSTL_INTERNAL_OMP_UTIL_H

diff  --git a/libcxx/include/pstl/internal/parallel_backend.h b/libcxx/include/pstl/internal/parallel_backend.h
new file mode 100644
index 0000000000000..4da871bfe74f2
--- /dev/null
+++ b/libcxx/include/pstl/internal/parallel_backend.h
@@ -0,0 +1,37 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_PARALLEL_BACKEND_H
+#define _PSTL_PARALLEL_BACKEND_H
+
+#include "pstl_config.h"
+
+#if defined(_PSTL_PAR_BACKEND_SERIAL)
+#    include "parallel_backend_serial.h"
+namespace __pstl
+{
+namespace __par_backend = __serial_backend;
+}
+#elif defined(_PSTL_PAR_BACKEND_TBB)
+#    include "parallel_backend_tbb.h"
+namespace __pstl
+{
+namespace __par_backend = __tbb_backend;
+}
+#elif defined(_PSTL_PAR_BACKEND_OPENMP)
+#    include "parallel_backend_omp.h"
+namespace __pstl
+{
+namespace __par_backend = __omp_backend;
+}
+#else
+_PSTL_PRAGMA_MESSAGE("Parallel backend was not specified");
+#endif
+
+#endif /* _PSTL_PARALLEL_BACKEND_H */

diff  --git a/libcxx/include/pstl/internal/parallel_backend_omp.h b/libcxx/include/pstl/internal/parallel_backend_omp.h
new file mode 100644
index 0000000000000..7398cfe55929a
--- /dev/null
+++ b/libcxx/include/pstl/internal/parallel_backend_omp.h
@@ -0,0 +1,58 @@
+// -*- C++ -*-
+// -*-===----------------------------------------------------------------------===//
+//
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_PARALLEL_BACKEND_OMP_H
+#define _PSTL_PARALLEL_BACKEND_OMP_H
+
+//------------------------------------------------------------------------
+// parallel_invoke
+//------------------------------------------------------------------------
+
+#include "./omp/parallel_invoke.h"
+
+//------------------------------------------------------------------------
+// parallel_for
+//------------------------------------------------------------------------
+
+#include "./omp/parallel_for.h"
+
+//------------------------------------------------------------------------
+// parallel_for_each
+//------------------------------------------------------------------------
+
+#include "./omp/parallel_for_each.h"
+
+//------------------------------------------------------------------------
+// parallel_reduce
+//------------------------------------------------------------------------
+
+#include "./omp/parallel_reduce.h"
+#include "./omp/parallel_transform_reduce.h"
+
+//------------------------------------------------------------------------
+// parallel_scan
+//------------------------------------------------------------------------
+
+#include "./omp/parallel_scan.h"
+#include "./omp/parallel_transform_scan.h"
+
+//------------------------------------------------------------------------
+// parallel_stable_sort
+//------------------------------------------------------------------------
+
+#include "./omp/parallel_stable_partial_sort.h"
+#include "./omp/parallel_stable_sort.h"
+
+//------------------------------------------------------------------------
+// parallel_merge
+//------------------------------------------------------------------------
+#include "./omp/parallel_merge.h"
+
+#endif //_PSTL_PARALLEL_BACKEND_OMP_H

diff  --git a/libcxx/include/pstl/internal/parallel_backend_serial.h b/libcxx/include/pstl/internal/parallel_backend_serial.h
new file mode 100644
index 0000000000000..ad2c5fc60ec3f
--- /dev/null
+++ b/libcxx/include/pstl/internal/parallel_backend_serial.h
@@ -0,0 +1,137 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_PARALLEL_BACKEND_SERIAL_H
+#define _PSTL_PARALLEL_BACKEND_SERIAL_H
+
+#include <algorithm>
+#include <cstddef>
+#include <memory>
+#include <numeric>
+#include <utility>
+
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __serial_backend
+{
+
+template <typename _Tp>
+class __buffer
+{
+    std::allocator<_Tp> __allocator_;
+    _Tp* __ptr_;
+    const std::size_t __buf_size_;
+    __buffer(const __buffer&) = delete;
+    void
+    operator=(const __buffer&) = delete;
+
+  public:
+    __buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
+
+    operator bool() const { return __ptr_ != nullptr; }
+    _Tp*
+    get() const
+    {
+        return __ptr_;
+    }
+    ~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
+};
+
+inline void
+__cancel_execution()
+{
+}
+
+template <class _ExecutionPolicy, class _Index, class _Fp>
+void
+__parallel_for(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
+{
+    __f(__first, __last);
+}
+
+template <class _ExecutionPolicy, class _Value, class _Index, typename _RealBody, typename _Reduction>
+_Value
+__parallel_reduce(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last,
+                  const _Value& __identity, const _RealBody& __real_body, const _Reduction&)
+{
+    if (__first == __last)
+    {
+        return __identity;
+    }
+    else
+    {
+        return __real_body(__first, __last, __identity);
+    }
+}
+
+template <class _ExecutionPolicy, class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce>
+_Tp
+__parallel_transform_reduce(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last,
+                            _UnaryOp, _Tp __init, _BinaryOp, _Reduce __reduce)
+{
+    return __reduce(__first, __last, __init);
+}
+
+template <class _ExecutionPolicy, typename _Index, typename _Tp, typename _Rp, typename _Cp, typename _Sp, typename _Ap>
+void
+__parallel_strict_scan(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial,
+                       _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex)
+{
+    _Tp __sum = __initial;
+    if (__n)
+        __sum = __combine(__sum, __reduce(_Index(0), __n));
+    __apex(__sum);
+    if (__n)
+        __scan(_Index(0), __n, __initial);
+}
+
+template <class _ExecutionPolicy, class _Index, class _UnaryOp, class _Tp, class _BinaryOp, class _Reduce, class _Scan>
+_Tp
+__parallel_transform_scan(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _Index __n, _UnaryOp,
+                          _Tp __init, _BinaryOp, _Reduce, _Scan __scan)
+{
+    return __scan(_Index(0), __n, __init);
+}
+
+template <class _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
+void
+__parallel_stable_sort(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __first,
+                       _RandomAccessIterator __last, _Compare __comp, _LeafSort __leaf_sort, std::size_t = 0)
+{
+    __leaf_sort(__first, __last, __comp);
+}
+
+template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2,
+          typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge>
+void
+__parallel_merge(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __first1,
+                 _RandomAccessIterator1 __last1, _RandomAccessIterator2 __first2, _RandomAccessIterator2 __last2,
+                 _RandomAccessIterator3 __outit, _Compare __comp, _LeafMerge __leaf_merge)
+{
+    __leaf_merge(__first1, __last1, __first2, __last2, __outit, __comp);
+}
+
+template <class _ExecutionPolicy, typename _F1, typename _F2>
+void
+__parallel_invoke(__pstl::__internal::__serial_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2)
+{
+    std::forward<_F1>(__f1)();
+    std::forward<_F2>(__f2)();
+}
+
+} // namespace __serial_backend
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_PARALLEL_BACKEND_SERIAL_H */

diff  --git a/libcxx/include/pstl/internal/parallel_backend_tbb.h b/libcxx/include/pstl/internal/parallel_backend_tbb.h
new file mode 100644
index 0000000000000..e336f69799eb0
--- /dev/null
+++ b/libcxx/include/pstl/internal/parallel_backend_tbb.h
@@ -0,0 +1,1296 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_PARALLEL_BACKEND_TBB_H
+#define _PSTL_PARALLEL_BACKEND_TBB_H
+
+#include <algorithm>
+#include <type_traits>
+
+#include "pstl_config.h"
+#include "parallel_backend_utils.h"
+
+// Bring in minimal required subset of Intel TBB
+#include <tbb/blocked_range.h>
+#include <tbb/parallel_for.h>
+#include <tbb/parallel_reduce.h>
+#include <tbb/parallel_scan.h>
+#include <tbb/parallel_invoke.h>
+#include <tbb/task_arena.h>
+#include <tbb/tbb_allocator.h>
+#include <tbb/task.h>
+
+#if TBB_INTERFACE_VERSION < 10000
+#    error Intel(R) Threading Building Blocks 2018 is required; older versions are not supported.
+#endif
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __tbb_backend
+{
+
+//! Raw memory buffer with automatic freeing and no exceptions.
+/** Some of our algorithms need to start with raw memory buffer,
+not an initialize array, because initialization/destruction
+would make the span be at least O(N). */
+// tbb::allocator can improve performance in some cases.
+template <typename _Tp>
+class __buffer
+{
+    tbb::tbb_allocator<_Tp> _M_allocator;
+    _Tp* _M_ptr;
+    const std::size_t _M_buf_size;
+    __buffer(const __buffer&) = delete;
+    void
+    operator=(const __buffer&) = delete;
+
+  public:
+    //! Try to obtain buffer of given size to store objects of _Tp type
+    __buffer(std::size_t n) : _M_allocator(), _M_ptr(_M_allocator.allocate(n)), _M_buf_size(n) {}
+    //! True if buffer was successfully obtained, zero otherwise.
+    operator bool() const { return _M_ptr != NULL; }
+    //! Return pointer to buffer, or  NULL if buffer could not be obtained.
+    _Tp*
+    get() const
+    {
+        return _M_ptr;
+    }
+    //! Destroy buffer
+    ~__buffer() { _M_allocator.deallocate(_M_ptr, _M_buf_size); }
+};
+
+// Wrapper for tbb::task
+inline void
+__cancel_execution()
+{
+#if TBB_INTERFACE_VERSION <= 12000
+    tbb::task::self().group()->cancel_group_execution();
+#else
+    tbb::task::current_context()->cancel_group_execution();
+#endif
+}
+
+//------------------------------------------------------------------------
+// parallel_for
+//------------------------------------------------------------------------
+
+template <class _Index, class _RealBody>
+class __parallel_for_body
+{
+  public:
+    __parallel_for_body(const _RealBody& __body) : _M_body(__body) {}
+    __parallel_for_body(const __parallel_for_body& __body) : _M_body(__body._M_body) {}
+    void
+    operator()(const tbb::blocked_range<_Index>& __range) const
+    {
+        _M_body(__range.begin(), __range.end());
+    }
+
+  private:
+    _RealBody _M_body;
+};
+
+//! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last)
+// wrapper over tbb::parallel_for
+template <class _ExecutionPolicy, class _Index, class _Fp>
+void
+__parallel_for(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
+{
+    tbb::this_task_arena::isolate([=]() {
+        tbb::parallel_for(tbb::blocked_range<_Index>(__first, __last), __parallel_for_body<_Index, _Fp>(__f));
+    });
+}
+
+//! Evaluation of brick f[i,j) for each subrange [i,j) of [first,last)
+// wrapper over tbb::parallel_reduce
+template <class _ExecutionPolicy, class _Value, class _Index, typename _RealBody, typename _Reduction>
+_Value
+__parallel_reduce(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last,
+                  const _Value& __identity, const _RealBody& __real_body, const _Reduction& __reduction)
+{
+    return tbb::this_task_arena::isolate([__first, __last, &__identity, &__real_body, &__reduction]() -> _Value {
+        return tbb::parallel_reduce(
+            tbb::blocked_range<_Index>(__first, __last), __identity,
+            [__real_body](const tbb::blocked_range<_Index>& __r, const _Value& __value) -> _Value {
+                return __real_body(__r.begin(), __r.end(), __value);
+            },
+            __reduction);
+    });
+}
+
+//------------------------------------------------------------------------
+// parallel_transform_reduce
+//
+// Notation:
+//      r(i,j,init) returns reduction of init with reduction over [i,j)
+//      u(i) returns f(i,i+1,identity) for a hypothetical left identity element of r
+//      c(x,y) combines values x and y that were the result of r or u
+//------------------------------------------------------------------------
+
+template <class _Index, class _Up, class _Tp, class _Cp, class _Rp>
+struct __par_trans_red_body
+{
+    alignas(_Tp) char _M_sum_storage[sizeof(_Tp)]; // Holds generalized non-commutative sum when has_sum==true
+    _Rp _M_brick_reduce;                           // Most likely to have non-empty layout
+    _Up _M_u;
+    _Cp _M_combine;
+    bool _M_has_sum; // Put last to minimize size of class
+    _Tp&
+    sum()
+    {
+        __TBB_ASSERT(_M_has_sum, "sum expected");
+        return *(_Tp*)_M_sum_storage;
+    }
+    __par_trans_red_body(_Up __u, _Tp __init, _Cp __c, _Rp __r)
+        : _M_brick_reduce(__r), _M_u(__u), _M_combine(__c), _M_has_sum(true)
+    {
+        new (_M_sum_storage) _Tp(__init);
+    }
+
+    __par_trans_red_body(__par_trans_red_body& __left, tbb::split)
+        : _M_brick_reduce(__left._M_brick_reduce), _M_u(__left._M_u), _M_combine(__left._M_combine), _M_has_sum(false)
+    {
+    }
+
+    ~__par_trans_red_body()
+    {
+        // 17.6.5.12 tells us to not worry about catching exceptions from destructors.
+        if (_M_has_sum)
+            sum().~_Tp();
+    }
+
+    void
+    join(__par_trans_red_body& __rhs)
+    {
+        sum() = _M_combine(sum(), __rhs.sum());
+    }
+
+    void
+    operator()(const tbb::blocked_range<_Index>& __range)
+    {
+        _Index __i = __range.begin();
+        _Index __j = __range.end();
+        if (!_M_has_sum)
+        {
+            __TBB_ASSERT(__range.size() > 1, "there should be at least 2 elements");
+            new (&_M_sum_storage)
+                _Tp(_M_combine(_M_u(__i), _M_u(__i + 1))); // The condition i+1 < j is provided by the grain size of 3
+            _M_has_sum = true;
+            std::advance(__i, 2);
+            if (__i == __j)
+                return;
+        }
+        sum() = _M_brick_reduce(__i, __j, sum());
+    }
+};
+
+template <class _ExecutionPolicy, class _Index, class _Up, class _Tp, class _Cp, class _Rp>
+_Tp
+__parallel_transform_reduce(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __first, _Index __last,
+                            _Up __u, _Tp __init, _Cp __combine, _Rp __brick_reduce)
+{
+    __tbb_backend::__par_trans_red_body<_Index, _Up, _Tp, _Cp, _Rp> __body(__u, __init, __combine, __brick_reduce);
+    // The grain size of 3 is used in order to provide mininum 2 elements for each body
+    tbb::this_task_arena::isolate(
+        [__first, __last, &__body]() { tbb::parallel_reduce(tbb::blocked_range<_Index>(__first, __last, 3), __body); });
+    return __body.sum();
+}
+
+//------------------------------------------------------------------------
+// parallel_scan
+//------------------------------------------------------------------------
+
+template <class _Index, class _Up, class _Tp, class _Cp, class _Rp, class _Sp>
+class __trans_scan_body
+{
+    alignas(_Tp) char _M_sum_storage[sizeof(_Tp)]; // Holds generalized non-commutative sum when has_sum==true
+    _Rp _M_brick_reduce;                           // Most likely to have non-empty layout
+    _Up _M_u;
+    _Cp _M_combine;
+    _Sp _M_scan;
+    bool _M_has_sum; // Put last to minimize size of class
+  public:
+    __trans_scan_body(_Up __u, _Tp __init, _Cp __combine, _Rp __reduce, _Sp __scan)
+        : _M_brick_reduce(__reduce), _M_u(__u), _M_combine(__combine), _M_scan(__scan), _M_has_sum(true)
+    {
+        new (_M_sum_storage) _Tp(__init);
+    }
+
+    __trans_scan_body(__trans_scan_body& __b, tbb::split)
+        : _M_brick_reduce(__b._M_brick_reduce), _M_u(__b._M_u), _M_combine(__b._M_combine), _M_scan(__b._M_scan),
+          _M_has_sum(false)
+    {
+    }
+
+    ~__trans_scan_body()
+    {
+        // 17.6.5.12 tells us to not worry about catching exceptions from destructors.
+        if (_M_has_sum)
+            sum().~_Tp();
+    }
+
+    _Tp&
+    sum() const
+    {
+        __TBB_ASSERT(_M_has_sum, "sum expected");
+        return *const_cast<_Tp*>(reinterpret_cast<_Tp const*>(_M_sum_storage));
+    }
+
+    void
+    operator()(const tbb::blocked_range<_Index>& __range, tbb::pre_scan_tag)
+    {
+        _Index __i = __range.begin();
+        _Index __j = __range.end();
+        if (!_M_has_sum)
+        {
+            new (&_M_sum_storage) _Tp(_M_u(__i));
+            _M_has_sum = true;
+            ++__i;
+            if (__i == __j)
+                return;
+        }
+        sum() = _M_brick_reduce(__i, __j, sum());
+    }
+
+    void
+    operator()(const tbb::blocked_range<_Index>& __range, tbb::final_scan_tag)
+    {
+        sum() = _M_scan(__range.begin(), __range.end(), sum());
+    }
+
+    void
+    reverse_join(__trans_scan_body& __a)
+    {
+        if (_M_has_sum)
+        {
+            sum() = _M_combine(__a.sum(), sum());
+        }
+        else
+        {
+            new (&_M_sum_storage) _Tp(__a.sum());
+            _M_has_sum = true;
+        }
+    }
+
+    void
+    assign(__trans_scan_body& __b)
+    {
+        sum() = __b.sum();
+    }
+};
+
+template <typename _Index>
+_Index
+__split(_Index __m)
+{
+    _Index __k = 1;
+    while (2 * __k < __m)
+        __k *= 2;
+    return __k;
+}
+
+//------------------------------------------------------------------------
+// __parallel_strict_scan
+//------------------------------------------------------------------------
+
+template <typename _Index, typename _Tp, typename _Rp, typename _Cp>
+void
+__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine)
+{
+    if (__m == 1)
+        __r[0] = __reduce(__i * __tilesize, __lastsize);
+    else
+    {
+        _Index __k = __split(__m);
+        tbb::parallel_invoke(
+            [=] { __tbb_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); },
+            [=] {
+                __tbb_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine);
+            });
+        if (__m == 2 * __k)
+            __r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]);
+    }
+}
+
+template <typename _Index, typename _Tp, typename _Cp, typename _Sp>
+void
+__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine,
+            _Sp __scan)
+{
+    if (__m == 1)
+        __scan(__i * __tilesize, __lastsize, __initial);
+    else
+    {
+        const _Index __k = __split(__m);
+        tbb::parallel_invoke(
+            [=] { __tbb_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); },
+            // Assumes that __combine never throws.
+            //TODO: Consider adding a requirement for user functors to be constant.
+            [=, &__combine] {
+                __tbb_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize,
+                                           __combine(__initial, __r[__k - 1]), __combine, __scan);
+            });
+    }
+}
+
+// Adapted from Intel(R) Cilk(TM) version from cilkpub.
+// Let i:len denote a counted interval of length n starting at i.  s denotes a generalized-sum value.
+// Expected actions of the functors are:
+//     reduce(i,len) -> s  -- return reduction value of i:len.
+//     combine(s1,s2) -> s -- return merged sum
+//     apex(s) -- do any processing necessary between reduce and scan.
+//     scan(i,len,initial) -- perform scan over i:len starting with initial.
+// The initial range 0:n is partitioned into consecutive subranges.
+// reduce and scan are each called exactly once per subrange.
+// Thus callers can rely upon side effects in reduce.
+// combine must not throw an exception.
+// apex is called exactly once, after all calls to reduce and before all calls to scan.
+// For example, it's useful for allocating a __buffer used by scan but whose size is the sum of all reduction values.
+// T must have a trivial constructor and destructor.
+template <class _ExecutionPolicy, typename _Index, typename _Tp, typename _Rp, typename _Cp, typename _Sp, typename _Ap>
+void
+__parallel_strict_scan(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Tp __initial,
+                       _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex)
+{
+    tbb::this_task_arena::isolate([=, &__combine]() {
+        if (__n > 1)
+        {
+            _Index __p = tbb::this_task_arena::max_concurrency();
+            const _Index __slack = 4;
+            _Index __tilesize = (__n - 1) / (__slack * __p) + 1;
+            _Index __m = (__n - 1) / __tilesize;
+            __buffer<_Tp> __buf(__m + 1);
+            _Tp* __r = __buf.get();
+            __tbb_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce,
+                                     __combine);
+
+            // When __apex is a no-op and __combine has no side effects, a good optimizer
+            // should be able to eliminate all code between here and __apex.
+            // Alternatively, provide a default value for __apex that can be
+            // recognized by metaprogramming that conditionlly executes the following.
+            size_t __k = __m + 1;
+            _Tp __t = __r[__k - 1];
+            while ((__k &= __k - 1))
+                __t = __combine(__r[__k - 1], __t);
+            __apex(__combine(__initial, __t));
+            __tbb_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial,
+                                       __combine, __scan);
+            return;
+        }
+        // Fewer than 2 elements in sequence, or out of memory.  Handle has single block.
+        _Tp __sum = __initial;
+        if (__n)
+            __sum = __combine(__sum, __reduce(_Index(0), __n));
+        __apex(__sum);
+        if (__n)
+            __scan(_Index(0), __n, __initial);
+    });
+}
+
+template <class _ExecutionPolicy, class _Index, class _Up, class _Tp, class _Cp, class _Rp, class _Sp>
+_Tp
+__parallel_transform_scan(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _Index __n, _Up __u, _Tp __init,
+                          _Cp __combine, _Rp __brick_reduce, _Sp __scan)
+{
+    __trans_scan_body<_Index, _Up, _Tp, _Cp, _Rp, _Sp> __body(__u, __init, __combine, __brick_reduce, __scan);
+    auto __range = tbb::blocked_range<_Index>(0, __n);
+    tbb::this_task_arena::isolate([__range, &__body]() { tbb::parallel_scan(__range, __body); });
+    return __body.sum();
+}
+
+//------------------------------------------------------------------------
+// parallel_stable_sort
+//------------------------------------------------------------------------
+
+//------------------------------------------------------------------------
+// stable_sort utilities
+//
+// These are used by parallel implementations but do not depend on them.
+//------------------------------------------------------------------------
+#define _PSTL_MERGE_CUT_OFF 2000
+
+template <typename _Func>
+class __func_task;
+template <typename _Func>
+class __root_task;
+
+#if TBB_INTERFACE_VERSION <= 12000
+class __task : public tbb::task
+{
+  public:
+    template <typename _Fn>
+    __task*
+    make_continuation(_Fn&& __f)
+    {
+        return new (allocate_continuation()) __func_task<typename std::decay<_Fn>::type>(std::forward<_Fn>(__f));
+    }
+
+    template <typename _Fn>
+    __task*
+    make_child_of(__task* parent, _Fn&& __f)
+    {
+        return new (parent->allocate_child()) __func_task<typename std::decay<_Fn>::type>(std::forward<_Fn>(__f));
+    }
+
+    template <typename _Fn>
+    __task*
+    make_additional_child_of(tbb::task* parent, _Fn&& __f)
+    {
+        return new (tbb::task::allocate_additional_child_of(*parent))
+            __func_task<typename std::decay<_Fn>::type>(std::forward<_Fn>(__f));
+    }
+
+    inline void
+    recycle_as_continuation()
+    {
+        tbb::task::recycle_as_continuation();
+    }
+
+    inline void
+    recycle_as_child_of(__task* parent)
+    {
+        tbb::task::recycle_as_child_of(*parent);
+    }
+
+    inline void
+    spawn(__task* __t)
+    {
+        tbb::task::spawn(*__t);
+    }
+
+    template <typename _Fn>
+    static inline void
+    spawn_root_and_wait(__root_task<_Fn>& __root)
+    {
+        tbb::task::spawn_root_and_wait(*__root._M_task);
+    }
+};
+
+template <typename _Func>
+class __func_task : public __task
+{
+    _Func _M_func;
+
+    tbb::task*
+    execute()
+    {
+        return _M_func(this);
+    };
+
+  public:
+    template <typename _Fn>
+    __func_task(_Fn&& __f) : _M_func{std::forward<_Fn>(__f)}
+    {
+    }
+
+    _Func&
+    body()
+    {
+        return _M_func;
+    }
+};
+
+template <typename _Func>
+class __root_task
+{
+    tbb::task* _M_task;
+
+  public:
+    template <typename... Args>
+    __root_task(Args&&... args)
+        : _M_task{new (tbb::task::allocate_root()) __func_task<_Func>{_Func(std::forward<Args>(args)...)}}
+    {
+    }
+
+    friend class __task;
+    friend class __func_task<_Func>;
+};
+
+#else  // TBB_INTERFACE_VERSION <= 12000
+class __task : public tbb::detail::d1::task
+{
+  protected:
+    tbb::detail::d1::small_object_allocator _M_allocator{};
+    tbb::detail::d1::execution_data* _M_execute_data{};
+    __task* _M_parent{};
+    std::atomic<int> _M_refcount{};
+    bool _M_recycle{};
+
+    template <typename _Fn>
+    __task*
+    allocate_func_task(_Fn&& __f)
+    {
+        _PSTL_ASSERT(_M_execute_data != nullptr);
+        tbb::detail::d1::small_object_allocator __alloc{};
+        auto __t =
+            __alloc.new_object<__func_task<typename std::decay<_Fn>::type>>(*_M_execute_data, std::forward<_Fn>(__f));
+        __t->_M_allocator = __alloc;
+        return __t;
+    }
+
+  public:
+    __task*
+    parent()
+    {
+        return _M_parent;
+    }
+
+    void
+    set_ref_count(int __n)
+    {
+        _M_refcount.store(__n, std::memory_order_release);
+    }
+
+    template <typename _Fn>
+    __task*
+    make_continuation(_Fn&& __f)
+    {
+        auto __t = allocate_func_task(std::forward<_Fn&&>(__f));
+        __t->_M_parent = _M_parent;
+        _M_parent = nullptr;
+        return __t;
+    }
+
+    template <typename _Fn>
+    __task*
+    make_child_of(__task* __parent, _Fn&& __f)
+    {
+        auto __t = allocate_func_task(std::forward<_Fn&&>(__f));
+        __t->_M_parent = __parent;
+        return __t;
+    }
+
+    template <typename _Fn>
+    __task*
+    make_additional_child_of(__task* __parent, _Fn&& __f)
+    {
+        auto __t = make_child_of(__parent, std::forward<_Fn>(__f));
+        _PSTL_ASSERT(__parent->_M_refcount.load(std::memory_order_relaxed) > 0);
+        ++__parent->_M_refcount;
+        return __t;
+    }
+
+    inline void
+    recycle_as_continuation()
+    {
+        _M_recycle = true;
+    }
+
+    inline void
+    recycle_as_child_of(__task* parent)
+    {
+        _M_recycle = true;
+        _M_parent = parent;
+    }
+
+    inline void
+    spawn(__task* __t)
+    {
+        _PSTL_ASSERT(_M_execute_data != nullptr);
+        tbb::detail::d1::spawn(*__t, *_M_execute_data->context);
+    }
+
+    template <typename _Fn>
+    static inline void
+    spawn_root_and_wait(__root_task<_Fn>& __root)
+    {
+        tbb::detail::d1::execute_and_wait(*__root._M_func_task, __root._M_context, __root._M_wait_object,
+                                          __root._M_context);
+    }
+
+    template <typename _Func>
+    friend class __func_task;
+};
+
+template <typename _Func>
+class __func_task : public __task
+{
+    _Func _M_func;
+
+    __task*
+    execute(tbb::detail::d1::execution_data& __ed) override
+    {
+        _M_execute_data = &__ed;
+        _M_recycle = false;
+        __task* __next = _M_func(this);
+        return finalize(__next);
+    };
+
+    __task*
+    cancel(tbb::detail::d1::execution_data& __ed) override
+    {
+        return finalize(nullptr);
+    }
+
+    __task*
+    finalize(__task* __next)
+    {
+        bool __recycle = _M_recycle;
+        _M_recycle = false;
+
+        if (__recycle)
+        {
+            return __next;
+        }
+
+        auto __parent = _M_parent;
+        auto __alloc = _M_allocator;
+        auto __ed = _M_execute_data;
+
+        this->~__func_task();
+
+        _PSTL_ASSERT(__parent != nullptr);
+        _PSTL_ASSERT(__parent->_M_refcount.load(std::memory_order_relaxed) > 0);
+        if (--__parent->_M_refcount == 0)
+        {
+            _PSTL_ASSERT(__next == nullptr);
+            __alloc.deallocate(this, *__ed);
+            return __parent;
+        }
+
+        return __next;
+    }
+
+    friend class __root_task<_Func>;
+
+  public:
+    template <typename _Fn>
+    __func_task(_Fn&& __f) : _M_func(std::forward<_Fn>(__f))
+    {
+    }
+
+    _Func&
+    body()
+    {
+        return _M_func;
+    }
+};
+
+template <typename _Func>
+class __root_task : public __task
+{
+    __task*
+    execute(tbb::detail::d1::execution_data& __ed) override
+    {
+        _M_wait_object.release();
+        return nullptr;
+    };
+
+    __task*
+    cancel(tbb::detail::d1::execution_data& __ed) override
+    {
+        _M_wait_object.release();
+        return nullptr;
+    }
+
+    __func_task<_Func>* _M_func_task{};
+    tbb::detail::d1::wait_context _M_wait_object{0};
+    tbb::task_group_context _M_context{};
+
+  public:
+    template <typename... Args>
+    __root_task(Args&&... args) : _M_wait_object{1}
+    {
+        tbb::detail::d1::small_object_allocator __alloc{};
+        _M_func_task = __alloc.new_object<__func_task<_Func>>(_Func(std::forward<Args>(args)...));
+        _M_func_task->_M_allocator = __alloc;
+        _M_func_task->_M_parent = this;
+        _M_refcount.store(1, std::memory_order_relaxed);
+    }
+
+    friend class __task;
+};
+#endif // TBB_INTERFACE_VERSION <= 12000
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Compare, typename _Cleanup,
+          typename _LeafMerge>
+class __merge_func
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::
diff erence_type _DifferenceType2;
+    typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType;
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::value_type _ValueType;
+
+    _RandomAccessIterator1 _M_x_beg;
+    _RandomAccessIterator2 _M_z_beg;
+
+    _SizeType _M_xs, _M_xe;
+    _SizeType _M_ys, _M_ye;
+    _SizeType _M_zs;
+    _Compare _M_comp;
+    _LeafMerge _M_leaf_merge;
+    _SizeType _M_nsort; //number of elements to be sorted for partial_sort alforithm
+
+    static const _SizeType __merge_cut_off = _PSTL_MERGE_CUT_OFF;
+
+    bool _root;   //means a task is merging root task
+    bool _x_orig; //"true" means X(or left ) subrange is in the original container; false - in the buffer
+    bool _y_orig; //"true" means Y(or right) subrange is in the original container; false - in the buffer
+    bool _split; //"true" means a merge task is a split task for parallel merging, the execution logic 
diff ers
+
+    bool
+    is_partial() const
+    {
+        return _M_nsort > 0;
+    }
+
+    struct __move_value
+    {
+        template <typename Iterator1, typename Iterator2>
+        void
+        operator()(Iterator1 __x, Iterator2 __z)
+        {
+            *__z = std::move(*__x);
+        }
+    };
+
+    struct __move_value_construct
+    {
+        template <typename Iterator1, typename Iterator2>
+        void
+        operator()(Iterator1 __x, Iterator2 __z)
+        {
+            ::new (std::addressof(*__z)) _ValueType(std::move(*__x));
+        }
+    };
+
+    struct __move_range
+    {
+        template <typename Iterator1, typename Iterator2>
+        Iterator2
+        operator()(Iterator1 __first1, Iterator1 __last1, Iterator2 __first2)
+        {
+            if (__last1 - __first1 < __merge_cut_off)
+                return std::move(__first1, __last1, __first2);
+
+            auto __n = __last1 - __first1;
+            tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off),
+                              [__first1, __first2](const tbb::blocked_range<_SizeType>& __range) {
+                                  std::move(__first1 + __range.begin(), __first1 + __range.end(),
+                                            __first2 + __range.begin());
+                              });
+            return __first2 + __n;
+        }
+    };
+
+    struct __move_range_construct
+    {
+        template <typename Iterator1, typename Iterator2>
+        Iterator2
+        operator()(Iterator1 __first1, Iterator1 __last1, Iterator2 __first2)
+        {
+            if (__last1 - __first1 < __merge_cut_off)
+            {
+                for (; __first1 != __last1; ++__first1, ++__first2)
+                    __move_value_construct()(__first1, __first2);
+                return __first2;
+            }
+
+            auto __n = __last1 - __first1;
+            tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off),
+                              [__first1, __first2](const tbb::blocked_range<_SizeType>& __range) {
+                                  for (auto i = __range.begin(); i != __range.end(); ++i)
+                                      __move_value_construct()(__first1 + i, __first2 + i);
+                              });
+            return __first2 + __n;
+        }
+    };
+
+    struct __cleanup_range
+    {
+        template <typename Iterator>
+        void
+        operator()(Iterator __first, Iterator __last)
+        {
+            if (__last - __first < __merge_cut_off)
+                _Cleanup()(__first, __last);
+            else
+            {
+                auto __n = __last - __first;
+                tbb::parallel_for(tbb::blocked_range<_SizeType>(0, __n, __merge_cut_off),
+                                  [__first](const tbb::blocked_range<_SizeType>& __range) {
+                                      _Cleanup()(__first + __range.begin(), __first + __range.end());
+                                  });
+            }
+        }
+    };
+
+  public:
+    __merge_func(_SizeType __xs, _SizeType __xe, _SizeType __ys, _SizeType __ye, _SizeType __zs, _Compare __comp,
+                 _Cleanup, _LeafMerge __leaf_merge, _SizeType __nsort, _RandomAccessIterator1 __x_beg,
+                 _RandomAccessIterator2 __z_beg, bool __x_orig, bool __y_orig, bool __root)
+        : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_x_beg(__x_beg), _M_z_beg(__z_beg),
+          _M_comp(__comp), _M_leaf_merge(__leaf_merge), _M_nsort(__nsort), _root(__root),
+          _x_orig(__x_orig), _y_orig(__y_orig), _split(false)
+    {
+    }
+
+    bool
+    is_left(_SizeType __idx) const
+    {
+        return _M_xs == __idx;
+    }
+
+    template <typename IndexType>
+    void
+    set_odd(IndexType __idx, bool __on_off)
+    {
+        if (is_left(__idx))
+            _x_orig = __on_off;
+        else
+            _y_orig = __on_off;
+    }
+
+    __task*
+    operator()(__task* __self);
+
+  private:
+    __merge_func*
+    parent_merge(__task* __self) const
+    {
+        return _root ? nullptr : &static_cast<__func_task<__merge_func>*>(__self->parent())->body();
+    }
+    bool
+    x_less_y()
+    {
+        const auto __nx = (_M_xe - _M_xs);
+        const auto __ny = (_M_ye - _M_ys);
+        _PSTL_ASSERT(__nx > 0 && __ny > 0);
+
+        _PSTL_ASSERT(_x_orig == _y_orig);
+        _PSTL_ASSERT(!is_partial());
+
+        if (_x_orig)
+        {
+            _PSTL_ASSERT(std::is_sorted(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_comp));
+            _PSTL_ASSERT(std::is_sorted(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_comp));
+            return !_M_comp(*(_M_x_beg + _M_ys), *(_M_x_beg + _M_xe - 1));
+        }
+
+        _PSTL_ASSERT(std::is_sorted(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_comp));
+        _PSTL_ASSERT(std::is_sorted(_M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_comp));
+        return !_M_comp(*(_M_z_beg + _M_zs + __nx), *(_M_z_beg + _M_zs + __nx - 1));
+    }
+    void
+    move_x_range()
+    {
+        const auto __nx = (_M_xe - _M_xs);
+        const auto __ny = (_M_ye - _M_ys);
+        _PSTL_ASSERT(__nx > 0 && __ny > 0);
+
+        if (_x_orig)
+            __move_range_construct()(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_z_beg + _M_zs);
+        else
+        {
+            __move_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx, _M_x_beg + _M_xs);
+            __cleanup_range()(_M_z_beg + _M_zs, _M_z_beg + _M_zs + __nx);
+        }
+
+        _x_orig = !_x_orig;
+    }
+    void
+    move_y_range()
+    {
+        const auto __nx = (_M_xe - _M_xs);
+        const auto __ny = (_M_ye - _M_ys);
+
+        if (_y_orig)
+            __move_range_construct()(_M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs + __nx);
+        else
+        {
+            __move_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny, _M_x_beg + _M_ys);
+            __cleanup_range()(_M_z_beg + _M_zs + __nx, _M_z_beg + _M_zs + __nx + __ny);
+        }
+
+        _y_orig = !_y_orig;
+    }
+    __task*
+    merge_ranges(__task* __self)
+    {
+        _PSTL_ASSERT(_x_orig == _y_orig); //two merged subrange must be lie into the same buffer
+
+        const auto __nx = (_M_xe - _M_xs);
+        const auto __ny = (_M_ye - _M_ys);
+        const auto __n = __nx + __ny;
+
+        // need to merge {x} and {y}
+        if (__n > __merge_cut_off)
+            return split_merging(__self);
+
+        //merge to buffer
+        if (_x_orig)
+        {
+            _M_leaf_merge(_M_x_beg + _M_xs, _M_x_beg + _M_xe, _M_x_beg + _M_ys, _M_x_beg + _M_ye, _M_z_beg + _M_zs,
+                          _M_comp, __move_value_construct(), __move_value_construct(), __move_range_construct(),
+                          __move_range_construct());
+            _PSTL_ASSERT(parent_merge(__self)); //not root merging task
+        }
+        //merge to "origin"
+        else
+        {
+            _PSTL_ASSERT(_x_orig == _y_orig);
+
+            _PSTL_ASSERT(is_partial() || std::is_sorted(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_comp));
+            _PSTL_ASSERT(is_partial() || std::is_sorted(_M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_comp));
+
+            const auto __nx = (_M_xe - _M_xs);
+            const auto __ny = (_M_ye - _M_ys);
+
+            _M_leaf_merge(_M_z_beg + _M_xs, _M_z_beg + _M_xe, _M_z_beg + _M_ys, _M_z_beg + _M_ye, _M_x_beg + _M_zs,
+                          _M_comp, __move_value(), __move_value(), __move_range(), __move_range());
+
+            __cleanup_range()(_M_z_beg + _M_xs, _M_z_beg + _M_xe);
+            __cleanup_range()(_M_z_beg + _M_ys, _M_z_beg + _M_ye);
+        }
+        return nullptr;
+    }
+
+    __task*
+    process_ranges(__task* __self)
+    {
+        _PSTL_ASSERT(_x_orig == _y_orig);
+        _PSTL_ASSERT(!_split);
+
+        auto p = parent_merge(__self);
+
+        if (!p)
+        { //root merging task
+
+            //optimization, just for sort algorithm, //{x} <= {y}
+            if (!is_partial() && x_less_y()) //we have a solution
+            {
+                if (!_x_orig)
+                {                   //we have to move the solution to the origin
+                    move_x_range(); //parallel moving
+                    move_y_range(); //parallel moving
+                }
+                return nullptr;
+            }
+            //else: if we have data in the origin,
+            //we have to move data to the buffer for final merging into the origin.
+            if (_x_orig)
+            {
+                move_x_range(); //parallel moving
+                move_y_range(); //parallel moving
+            }
+            // need to merge {x} and {y}.
+            return merge_ranges(__self);
+        }
+        //else: not root merging task (parent_merge() == NULL)
+        //optimization, just for sort algorithm, //{x} <= {y}
+        if (!is_partial() && x_less_y())
+        {
+            const auto id_range = _M_zs;
+            p->set_odd(id_range, _x_orig);
+            return nullptr;
+        }
+        //else: we have to revert "_x(y)_orig" flag of the parent merging task
+        const auto id_range = _M_zs;
+        p->set_odd(id_range, !_x_orig);
+
+        return merge_ranges(__self);
+    }
+
+    //splitting as merge task into 2 of the same level
+    __task*
+    split_merging(__task* __self)
+    {
+        _PSTL_ASSERT(_x_orig == _y_orig);
+        const auto __nx = (_M_xe - _M_xs);
+        const auto __ny = (_M_ye - _M_ys);
+
+        _SizeType __xm{};
+        _SizeType __ym{};
+        if (__nx < __ny)
+        {
+            __ym = _M_ys + __ny / 2;
+
+            if (_x_orig)
+                __xm = std::upper_bound(_M_x_beg + _M_xs, _M_x_beg + _M_xe, *(_M_x_beg + __ym), _M_comp) - _M_x_beg;
+            else
+                __xm = std::upper_bound(_M_z_beg + _M_xs, _M_z_beg + _M_xe, *(_M_z_beg + __ym), _M_comp) - _M_z_beg;
+        }
+        else
+        {
+            __xm = _M_xs + __nx / 2;
+
+            if (_y_orig)
+                __ym = std::lower_bound(_M_x_beg + _M_ys, _M_x_beg + _M_ye, *(_M_x_beg + __xm), _M_comp) - _M_x_beg;
+            else
+                __ym = std::lower_bound(_M_z_beg + _M_ys, _M_z_beg + _M_ye, *(_M_z_beg + __xm), _M_comp) - _M_z_beg;
+        }
+
+        auto __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys));
+        __merge_func __right_func(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _Cleanup(), _M_leaf_merge, _M_nsort,
+                                  _M_x_beg, _M_z_beg, _x_orig, _y_orig, _root);
+        __right_func._split = true;
+        auto __merge_task = __self->make_additional_child_of(__self->parent(), std::move(__right_func));
+        __self->spawn(__merge_task);
+        __self->recycle_as_continuation();
+
+        _M_xe = __xm;
+        _M_ye = __ym;
+        _split = true;
+
+        return __self;
+    }
+};
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename __M_Compare, typename _Cleanup,
+          typename _LeafMerge>
+__task*
+__merge_func<_RandomAccessIterator1, _RandomAccessIterator2, __M_Compare, _Cleanup, _LeafMerge>::
+operator()(__task* __self)
+{
+    //a. split merge task into 2 of the same level; the special logic,
+    //without processing(process_ranges) adjacent sub-ranges x and y
+    if (_split)
+        return merge_ranges(__self);
+
+    //b. General merging of adjacent sub-ranges x and y (with optimization in case of {x} <= {y} )
+
+    //1. x and y are in the even buffer
+    //2. x and y are in the odd buffer
+    if (_x_orig == _y_orig)
+        return process_ranges(__self);
+
+    //3. x is in even buffer, y is in the odd buffer
+    //4. x is in odd buffer, y is in the even buffer
+    if (!parent_merge(__self))
+    { //root merge task
+        if (_x_orig)
+            move_x_range();
+        else
+            move_y_range();
+    }
+    else
+    {
+        const _SizeType __nx = (_M_xe - _M_xs);
+        const _SizeType __ny = (_M_ye - _M_ys);
+        _PSTL_ASSERT(__nx > 0);
+        _PSTL_ASSERT(__nx > 0);
+
+        if (__nx < __ny)
+            move_x_range();
+        else
+            move_y_range();
+    }
+
+    return process_ranges(__self);
+}
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Compare, typename _LeafSort>
+class __stable_sort_func
+{
+  public:
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::
diff erence_type _DifferenceType2;
+    typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType;
+
+  private:
+    _RandomAccessIterator1 _M_xs, _M_xe, _M_x_beg;
+    _RandomAccessIterator2 _M_zs, _M_z_beg;
+    _Compare _M_comp;
+    _LeafSort _M_leaf_sort;
+    bool _M_root;
+    _SizeType _M_nsort; //zero or number of elements to be sorted for partial_sort alforithm
+
+  public:
+    __stable_sort_func(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __zs,
+                       bool __root, _Compare __comp, _LeafSort __leaf_sort, _SizeType __nsort,
+                       _RandomAccessIterator1 __x_beg, _RandomAccessIterator2 __z_beg)
+        : _M_xs(__xs), _M_xe(__xe), _M_x_beg(__x_beg), _M_zs(__zs), _M_z_beg(__z_beg), _M_comp(__comp),
+          _M_leaf_sort(__leaf_sort), _M_root(__root), _M_nsort(__nsort)
+    {
+    }
+
+    __task*
+    operator()(__task* __self);
+};
+
+#define _PSTL_STABLE_SORT_CUT_OFF 500
+
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _Compare, typename _LeafSort>
+__task*
+__stable_sort_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, _LeafSort>::operator()(__task* __self)
+{
+    typedef __merge_func<_RandomAccessIterator1, _RandomAccessIterator2, _Compare, __utils::__serial_destroy,
+                         __utils::__serial_move_merge>
+        _MergeTaskType;
+
+    const _SizeType __n = _M_xe - _M_xs;
+    const _SizeType __nmerge = _M_nsort > 0 ? _M_nsort : __n;
+    const _SizeType __sort_cut_off = _PSTL_STABLE_SORT_CUT_OFF;
+    if (__n <= __sort_cut_off)
+    {
+        _M_leaf_sort(_M_xs, _M_xe, _M_comp);
+        _PSTL_ASSERT(!_M_root);
+        return nullptr;
+    }
+
+    const _RandomAccessIterator1 __xm = _M_xs + __n / 2;
+    const _RandomAccessIterator2 __zm = _M_zs + (__xm - _M_xs);
+    const _RandomAccessIterator2 __ze = _M_zs + __n;
+    _MergeTaskType __m(_MergeTaskType(_M_xs - _M_x_beg, __xm - _M_x_beg, __xm - _M_x_beg, _M_xe - _M_x_beg,
+                                      _M_zs - _M_z_beg, _M_comp, __utils::__serial_destroy(),
+                                      __utils::__serial_move_merge(__nmerge), _M_nsort, _M_x_beg, _M_z_beg,
+                                      /*x_orig*/ true, /*y_orig*/ true, /*root*/ _M_root));
+    auto __parent = __self->make_continuation(std::move(__m));
+    __parent->set_ref_count(2);
+    auto __right = __self->make_child_of(
+        __parent, __stable_sort_func(__xm, _M_xe, __zm, false, _M_comp, _M_leaf_sort, _M_nsort, _M_x_beg, _M_z_beg));
+    __self->spawn(__right);
+    __self->recycle_as_child_of(__parent);
+    _M_root = false;
+    _M_xe = __xm;
+
+    return __self;
+}
+
+template <class _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
+void
+__parallel_stable_sort(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator __xs,
+                       _RandomAccessIterator __xe, _Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0)
+{
+    tbb::this_task_arena::isolate([=, &__nsort]() {
+        //sorting based on task tree and parallel merge
+        typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _ValueType;
+        typedef typename std::iterator_traits<_RandomAccessIterator>::
diff erence_type _DifferenceType;
+        const _DifferenceType __n = __xe - __xs;
+        if (__nsort == __n)
+            __nsort = 0; // 'partial_sort' becames 'sort'
+
+        const _DifferenceType __sort_cut_off = _PSTL_STABLE_SORT_CUT_OFF;
+        if (__n > __sort_cut_off)
+        {
+            __buffer<_ValueType> __buf(__n);
+            __root_task<__stable_sort_func<_RandomAccessIterator, _ValueType*, _Compare, _LeafSort>> __root{
+                __xs, __xe, __buf.get(), true, __comp, __leaf_sort, __nsort, __xs, __buf.get()};
+            __task::spawn_root_and_wait(__root);
+            return;
+        }
+        //serial sort
+        __leaf_sort(__xs, __xe, __comp);
+    });
+}
+
+//------------------------------------------------------------------------
+// parallel_merge
+//------------------------------------------------------------------------
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3,
+          typename _Compare, typename _LeafMerge>
+class __merge_func_static
+{
+    _RandomAccessIterator1 _M_xs, _M_xe;
+    _RandomAccessIterator2 _M_ys, _M_ye;
+    _RandomAccessIterator3 _M_zs;
+    _Compare _M_comp;
+    _LeafMerge _M_leaf_merge;
+
+  public:
+    __merge_func_static(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys,
+                        _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp,
+                        _LeafMerge __leaf_merge)
+        : _M_xs(__xs), _M_xe(__xe), _M_ys(__ys), _M_ye(__ye), _M_zs(__zs), _M_comp(__comp), _M_leaf_merge(__leaf_merge)
+    {
+    }
+
+    __task*
+    operator()(__task* __self);
+};
+
+//TODO: consider usage of parallel_for with a custom blocked_range
+template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3,
+          typename __M_Compare, typename _LeafMerge>
+__task*
+__merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3, __M_Compare, _LeafMerge>::
+operator()(__task* __self)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::
diff erence_type _DifferenceType2;
+    typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType;
+    const _SizeType __n = (_M_xe - _M_xs) + (_M_ye - _M_ys);
+    const _SizeType __merge_cut_off = _PSTL_MERGE_CUT_OFF;
+    if (__n <= __merge_cut_off)
+    {
+        _M_leaf_merge(_M_xs, _M_xe, _M_ys, _M_ye, _M_zs, _M_comp);
+        return nullptr;
+    }
+
+    _RandomAccessIterator1 __xm;
+    _RandomAccessIterator2 __ym;
+    if (_M_xe - _M_xs < _M_ye - _M_ys)
+    {
+        __ym = _M_ys + (_M_ye - _M_ys) / 2;
+        __xm = std::upper_bound(_M_xs, _M_xe, *__ym, _M_comp);
+    }
+    else
+    {
+        __xm = _M_xs + (_M_xe - _M_xs) / 2;
+        __ym = std::lower_bound(_M_ys, _M_ye, *__xm, _M_comp);
+    }
+    const _RandomAccessIterator3 __zm = _M_zs + ((__xm - _M_xs) + (__ym - _M_ys));
+    auto __right = __self->make_additional_child_of(
+        __self->parent(), __merge_func_static(__xm, _M_xe, __ym, _M_ye, __zm, _M_comp, _M_leaf_merge));
+    __self->spawn(__right);
+    __self->recycle_as_continuation();
+    _M_xe = __xm;
+    _M_ye = __ym;
+
+    return __self;
+}
+
+template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2,
+          typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge>
+void
+__parallel_merge(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _RandomAccessIterator1 __xs,
+                 _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye,
+                 _RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge)
+{
+    typedef typename std::iterator_traits<_RandomAccessIterator1>::
diff erence_type _DifferenceType1;
+    typedef typename std::iterator_traits<_RandomAccessIterator2>::
diff erence_type _DifferenceType2;
+    typedef typename std::common_type<_DifferenceType1, _DifferenceType2>::type _SizeType;
+    const _SizeType __n = (__xe - __xs) + (__ye - __ys);
+    const _SizeType __merge_cut_off = _PSTL_MERGE_CUT_OFF;
+    if (__n <= __merge_cut_off)
+    {
+        // Fall back on serial merge
+        __leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp);
+    }
+    else
+    {
+        tbb::this_task_arena::isolate([=]() {
+            typedef __merge_func_static<_RandomAccessIterator1, _RandomAccessIterator2, _RandomAccessIterator3,
+                                        _Compare, _LeafMerge>
+                _TaskType;
+            __root_task<_TaskType> __root{__xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge};
+            __task::spawn_root_and_wait(__root);
+        });
+    }
+}
+
+//------------------------------------------------------------------------
+// parallel_invoke
+//------------------------------------------------------------------------
+template <class _ExecutionPolicy, typename _F1, typename _F2>
+void
+__parallel_invoke(__pstl::__internal::__tbb_backend_tag, _ExecutionPolicy&&, _F1&& __f1, _F2&& __f2)
+{
+    //TODO: a version of tbb::this_task_arena::isolate with variadic arguments pack should be added in the future
+    tbb::this_task_arena::isolate([&]() { tbb::parallel_invoke(std::forward<_F1>(__f1), std::forward<_F2>(__f2)); });
+}
+
+} // namespace __tbb_backend
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_PARALLEL_BACKEND_TBB_H */

diff  --git a/libcxx/include/pstl/internal/parallel_backend_utils.h b/libcxx/include/pstl/internal/parallel_backend_utils.h
new file mode 100644
index 0000000000000..e176d7e935184
--- /dev/null
+++ b/libcxx/include/pstl/internal/parallel_backend_utils.h
@@ -0,0 +1,263 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_PARALLEL_BACKEND_UTILS_H
+#define _PSTL_PARALLEL_BACKEND_UTILS_H
+
+#include <iterator>
+#include <utility>
+#include "utils.h"
+
+#include "pstl_config.h"
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+
+namespace __utils
+{
+
+//! Destroy sequence [xs,xe)
+struct __serial_destroy
+{
+    template <typename _RandomAccessIterator>
+    void
+    operator()(_RandomAccessIterator __zs, _RandomAccessIterator __ze)
+    {
+        typedef typename std::iterator_traits<_RandomAccessIterator>::value_type _ValueType;
+        while (__zs != __ze)
+        {
+            --__ze;
+            (*__ze).~_ValueType();
+        }
+    }
+};
+
+//! Merge sequences [__xs,__xe) and [__ys,__ye) to output sequence [__zs,(__xe-__xs)+(__ye-__ys)), using std::move
+struct __serial_move_merge
+{
+    const std::size_t _M_nmerge;
+
+    explicit __serial_move_merge(std::size_t __nmerge) : _M_nmerge(__nmerge) {}
+    template <class _RandomAccessIterator1, class _RandomAccessIterator2, class _RandomAccessIterator3, class _Compare,
+              class _MoveValueX, class _MoveValueY, class _MoveSequenceX, class _MoveSequenceY>
+    void
+    operator()(_RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys,
+               _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp, _MoveValueX __move_value_x,
+               _MoveValueY __move_value_y, _MoveSequenceX __move_sequence_x, _MoveSequenceY __move_sequence_y)
+    {
+        constexpr bool __same_move_val = std::is_same<_MoveValueX, _MoveValueY>::value;
+        constexpr bool __same_move_seq = std::is_same<_MoveSequenceX, _MoveSequenceY>::value;
+
+        auto __n = _M_nmerge;
+        _PSTL_ASSERT(__n > 0);
+
+        auto __nx = __xe - __xs;
+        //auto __ny = __ye - __ys;
+        _RandomAccessIterator3 __zs_beg = __zs;
+
+        if (__xs != __xe)
+        {
+            if (__ys != __ye)
+            {
+                for (;;)
+                {
+                    if (__comp(*__ys, *__xs))
+                    {
+                        const auto __i = __zs - __zs_beg;
+                        if (__i < __nx)
+                            __move_value_x(__ys, __zs);
+                        else
+                            __move_value_y(__ys, __zs);
+                        ++__zs, --__n;
+                        if (++__ys == __ye)
+                        {
+                            break;
+                        }
+                        else if (__n == 0)
+                        {
+                            const auto __j = __zs - __zs_beg;
+                            if (__same_move_seq || __j < __nx)
+                                __zs = __move_sequence_x(__ys, __ye, __zs);
+                            else
+                                __zs = __move_sequence_y(__ys, __ye, __zs);
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        const auto __i = __zs - __zs_beg;
+                        if (__same_move_val || __i < __nx)
+                            __move_value_x(__xs, __zs);
+                        else
+                            __move_value_y(__xs, __zs);
+                        ++__zs, --__n;
+                        if (++__xs == __xe)
+                        {
+                            const auto __j = __zs - __zs_beg;
+                            if (__same_move_seq || __j < __nx)
+                                __move_sequence_x(__ys, __ye, __zs);
+                            else
+                                __move_sequence_y(__ys, __ye, __zs);
+                            return;
+                        }
+                        else if (__n == 0)
+                        {
+                            const auto __j = __zs - __zs_beg;
+                            if (__same_move_seq || __j < __nx)
+                            {
+                                __zs = __move_sequence_x(__xs, __xe, __zs);
+                                __move_sequence_x(__ys, __ye, __zs);
+                            }
+                            else
+                            {
+                                __zs = __move_sequence_y(__xs, __xe, __zs);
+                                __move_sequence_y(__ys, __ye, __zs);
+                            }
+                            return;
+                        }
+                    }
+                }
+            }
+            __ys = __xs;
+            __ye = __xe;
+        }
+        const auto __i = __zs - __zs_beg;
+        if (__same_move_seq || __i < __nx)
+            __move_sequence_x(__ys, __ye, __zs);
+        else
+            __move_sequence_y(__ys, __ye, __zs);
+    }
+};
+
+template <typename _ForwardIterator1, typename _ForwardIterator2, typename _OutputIterator, typename _Compare,
+          typename _CopyConstructRange>
+_OutputIterator
+__set_union_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                      _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                      _CopyConstructRange __cc_range)
+{
+    using _Tp = typename std::iterator_traits<_OutputIterator>::value_type;
+
+    for (; __first1 != __last1; ++__result)
+    {
+        if (__first2 == __last2)
+            return __cc_range(__first1, __last1, __result);
+        if (__comp(*__first2, *__first1))
+        {
+            ::new (std::addressof(*__result)) _Tp(*__first2);
+            ++__first2;
+        }
+        else
+        {
+            ::new (std::addressof(*__result)) _Tp(*__first1);
+            if (!__comp(*__first1, *__first2))
+                ++__first2;
+            ++__first1;
+        }
+    }
+    return __cc_range(__first2, __last2, __result);
+}
+
+template <typename _ForwardIterator1, typename _ForwardIterator2, typename _OutputIterator, typename _Compare>
+_OutputIterator
+__set_intersection_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                             _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp)
+{
+    using _Tp = typename std::iterator_traits<_OutputIterator>::value_type;
+
+    for (; __first1 != __last1 && __first2 != __last2;)
+    {
+        if (__comp(*__first1, *__first2))
+            ++__first1;
+        else
+        {
+            if (!__comp(*__first2, *__first1))
+            {
+                ::new (std::addressof(*__result)) _Tp(*__first1);
+                ++__result;
+                ++__first1;
+            }
+            ++__first2;
+        }
+    }
+    return __result;
+}
+
+template <typename _ForwardIterator1, typename _ForwardIterator2, typename _OutputIterator, typename _Compare,
+          typename _CopyConstructRange>
+_OutputIterator
+__set_
diff erence_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                           _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                           _CopyConstructRange __cc_range)
+{
+    using _Tp = typename std::iterator_traits<_OutputIterator>::value_type;
+
+    for (; __first1 != __last1;)
+    {
+        if (__first2 == __last2)
+            return __cc_range(__first1, __last1, __result);
+
+        if (__comp(*__first1, *__first2))
+        {
+            ::new (std::addressof(*__result)) _Tp(*__first1);
+            ++__result;
+            ++__first1;
+        }
+        else
+        {
+            if (!__comp(*__first2, *__first1))
+                ++__first1;
+            ++__first2;
+        }
+    }
+    return __result;
+}
+template <typename _ForwardIterator1, typename _ForwardIterator2, typename _OutputIterator, typename _Compare,
+          typename _CopyConstructRange>
+_OutputIterator
+__set_symmetric_
diff erence_construct(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2,
+                                     _ForwardIterator2 __last2, _OutputIterator __result, _Compare __comp,
+                                     _CopyConstructRange __cc_range)
+{
+    using _Tp = typename std::iterator_traits<_OutputIterator>::value_type;
+
+    for (; __first1 != __last1;)
+    {
+        if (__first2 == __last2)
+            return __cc_range(__first1, __last1, __result);
+
+        if (__comp(*__first1, *__first2))
+        {
+            ::new (std::addressof(*__result)) _Tp(*__first1);
+            ++__result;
+            ++__first1;
+        }
+        else
+        {
+            if (__comp(*__first2, *__first1))
+            {
+                ::new (std::addressof(*__result)) _Tp(*__first2);
+                ++__result;
+            }
+            else
+                ++__first1;
+            ++__first2;
+        }
+    }
+    return __cc_range(__first2, __last2, __result);
+}
+
+} // namespace __utils
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_PARALLEL_BACKEND_UTILS_H */

diff  --git a/libcxx/include/pstl/internal/parallel_impl.h b/libcxx/include/pstl/internal/parallel_impl.h
new file mode 100644
index 0000000000000..76b3f43e0d523
--- /dev/null
+++ b/libcxx/include/pstl/internal/parallel_impl.h
@@ -0,0 +1,90 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_PARALLEL_IMPL_H
+#define _PSTL_PARALLEL_IMPL_H
+
+#include "pstl_config.h"
+
+#include <atomic>
+// This header defines the minimum set of parallel routines required to support Parallel STL,
+// implemented on top of Intel(R) Threading Building Blocks (Intel(R) TBB) library
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+//------------------------------------------------------------------------
+// parallel_find
+//-----------------------------------------------------------------------
+/** Return extremum value returned by brick f[i,j) for subranges [i,j) of [first,last)
+Each f[i,j) must return a value in [i,j). */
+template <class _BackendTag, class _ExecutionPolicy, class _Index, class _Brick, class _Compare>
+_Index
+__parallel_find(_BackendTag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f,
+                _Compare __comp, bool __b_first)
+{
+    typedef typename std::iterator_traits<_Index>::
diff erence_type _DifferenceType;
+    const _DifferenceType __n = __last - __first;
+    _DifferenceType __initial_dist = __b_first ? __n : -1;
+    std::atomic<_DifferenceType> __extremum(__initial_dist);
+    // TODO: find out what is better here: parallel_for or parallel_reduce
+    __par_backend::__parallel_for(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                  [__comp, __f, __first, &__extremum](_Index __i, _Index __j)
+                                  {
+                                      // See "Reducing Contention Through Priority Updates", PPoPP '13, for discussion of
+                                      // why using a shared variable scales fairly well in this situation.
+                                      if (__comp(__i - __first, __extremum))
+                                      {
+                                          _Index __res = __f(__i, __j);
+                                          // If not '__last' returned then we found what we want so put this to extremum
+                                          if (__res != __j)
+                                          {
+                                              const _DifferenceType __k = __res - __first;
+                                              for (_DifferenceType __old = __extremum; __comp(__k, __old);
+                                                   __old = __extremum)
+                                              {
+                                                  __extremum.compare_exchange_weak(__old, __k);
+                                              }
+                                          }
+                                      }
+                                  });
+    return __extremum != __initial_dist ? __first + __extremum : __last;
+}
+
+//------------------------------------------------------------------------
+// parallel_or
+//------------------------------------------------------------------------
+//! Return true if brick f[i,j) returns true for some subrange [i,j) of [first,last)
+template <class _BackendTag, class _ExecutionPolicy, class _Index, class _Brick>
+bool
+__parallel_or(_BackendTag __tag, _ExecutionPolicy&& __exec, _Index __first, _Index __last, _Brick __f)
+{
+    std::atomic<bool> __found(false);
+    __par_backend::__parallel_for(__tag, std::forward<_ExecutionPolicy>(__exec), __first, __last,
+                                  [__f, &__found](_Index __i, _Index __j)
+                                  {
+                                      if (!__found.load(std::memory_order_relaxed) && __f(__i, __j))
+                                      {
+                                          __found.store(true, std::memory_order_relaxed);
+                                          __par_backend::__cancel_execution();
+                                      }
+                                  });
+    return __found;
+}
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_PARALLEL_IMPL_H */

diff  --git a/libcxx/include/pstl/internal/pstl_config.h b/libcxx/include/pstl/internal/pstl_config.h
new file mode 100644
index 0000000000000..de89e7bd977e7
--- /dev/null
+++ b/libcxx/include/pstl/internal/pstl_config.h
@@ -0,0 +1,204 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_CONFIG_H
+#define _PSTL_CONFIG_H
+
+#include <__pstl_config_site>
+
+// The version is XYYZ, where X is major, YY is minor, and Z is patch (i.e. X.YY.Z)
+#define _PSTL_VERSION 16000
+#define _PSTL_VERSION_MAJOR (_PSTL_VERSION / 1000)
+#define _PSTL_VERSION_MINOR ((_PSTL_VERSION % 1000) / 10)
+#define _PSTL_VERSION_PATCH (_PSTL_VERSION % 10)
+
+#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) && !defined(_PSTL_PAR_BACKEND_OPENMP)
+#    error "A parallel backend must be specified"
+#endif
+
+// Check the user-defined macro for warnings
+#if defined(PSTL_USAGE_WARNINGS)
+#    define _PSTL_USAGE_WARNINGS
+#endif
+
+#if defined(_LIBCPP_VERSION)
+#    include <__assert>
+#    define _PSTL_ASSERT(pred) _LIBCPP_ASSERT(pred, "")
+#elif defined(__GLIBCXX__)
+#    define _PSTL_ASSERT(pred) __glibcxx_assert(pred)
+#else
+#    include <cassert>
+#    define _PSTL_ASSERT(pred) (assert((pred)))
+#endif
+
+// Portability "#pragma" definition
+#ifdef _MSC_VER
+#    define _PSTL_PRAGMA(x) __pragma(x)
+#else
+#    define _PSTL_PRAGMA(x) _Pragma(#    x)
+#endif
+
+#define _PSTL_STRING_AUX(x) #x
+#define _PSTL_STRING(x) _PSTL_STRING_AUX(x)
+#define _PSTL_STRING_CONCAT(x, y) x #y
+
+#ifdef _PSTL_HIDE_FROM_ABI_PER_TU
+#    define _PSTL_HIDE_FROM_ABI_PUSH                                                                                   \
+        _Pragma("clang attribute push(__attribute__((internal_linkage)), apply_to=any(function,record))")
+#    define _PSTL_HIDE_FROM_ABI_POP _Pragma("clang attribute pop")
+#else
+#    define _PSTL_HIDE_FROM_ABI_PUSH /* nothing */
+#    define _PSTL_HIDE_FROM_ABI_POP  /* nothing */
+#endif
+
+// note that when ICC or Clang is in use, _PSTL_GCC_VERSION might not fully match
+// the actual GCC version on the system.
+#define _PSTL_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
+
+#if defined(__clang__)
+// according to clang documentation, version can be vendor specific
+#    define _PSTL_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__)
+#endif
+
+// Enable SIMD for compilers that support OpenMP 4.0
+#if (defined(_OPENMP) && _OPENMP >= 201307) || \
+    (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600) || \
+    (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \
+    defined(__clang__)
+#    define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(omp simd)
+#    define _PSTL_PRAGMA_DECLARE_SIMD _PSTL_PRAGMA(omp declare simd)
+#    define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(omp simd reduction(PRM))
+#elif !defined(_MSC_VER) //#pragma simd
+#    define _PSTL_PRAGMA_SIMD _PSTL_PRAGMA(simd)
+#    define _PSTL_PRAGMA_DECLARE_SIMD
+#    define _PSTL_PRAGMA_SIMD_REDUCTION(PRM) _PSTL_PRAGMA(simd reduction(PRM))
+#else //no simd
+#    define _PSTL_PRAGMA_SIMD
+#    define _PSTL_PRAGMA_DECLARE_SIMD
+#    define _PSTL_PRAGMA_SIMD_REDUCTION(PRM)
+#endif //Enable SIMD
+
+#if defined(__INTEL_COMPILER)
+#    define _PSTL_PRAGMA_FORCEINLINE _PSTL_PRAGMA(forceinline)
+#else
+#    define _PSTL_PRAGMA_FORCEINLINE
+#endif
+
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900
+#    define _PSTL_PRAGMA_SIMD_SCAN(PRM) _PSTL_PRAGMA(omp simd reduction(inscan, PRM))
+#    define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan inclusive(PRM))
+#    define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM) _PSTL_PRAGMA(omp scan exclusive(PRM))
+#else
+#    define _PSTL_PRAGMA_SIMD_SCAN(PRM)
+#    define _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(PRM)
+#    define _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(PRM)
+#endif
+
+// Should be defined to 1 for environments with a vendor implementation of C++17 execution policies
+#define _PSTL_CPP17_EXECUTION_POLICIES_PRESENT (_MSC_VER >= 1912 && _MSVC_LANG >= 201703L) ||                          \
+    (_GLIBCXX_RELEASE >= 9 && __GLIBCXX__ >= 20190503 && __cplusplus >= 201703L)
+
+#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+    __cplusplus >= 201300L || \
+    __cpp_lib_robust_nonmodifying_seq_ops == 201304
+#   define _PSTL_CPP14_2RANGE_MISMATCH_EQUAL_PRESENT
+#endif
+#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
+    __cplusplus >= 201402L || \
+    __cpp_lib_make_reverse_iterator == 201402
+#   define _PSTL_CPP14_MAKE_REVERSE_ITERATOR_PRESENT
+#endif
+#if (defined(_MSC_VER) && _MSC_VER >= 1900) || __cplusplus >= 201402L
+#   define _PSTL_CPP14_INTEGER_SEQUENCE_PRESENT
+#endif
+#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1700) || \
+    (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 190023918) || \
+    __cplusplus >= 201402L
+#   define _PSTL_CPP14_VARIABLE_TEMPLATES_PRESENT
+#endif
+
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800
+#   define _PSTL_EARLYEXIT_PRESENT
+#   define _PSTL_MONOTONIC_PRESENT
+#endif
+
+#if (defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900) || \
+    (!defined(__INTEL_COMPILER) && _PSTL_GCC_VERSION >= 40900) || \
+    (defined(_OPENMP) && _OPENMP >= 201307)
+#    define _PSTL_UDR_PRESENT
+#endif
+
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1900 && __INTEL_COMPILER_BUILD_DATE >= 20180626
+#   define _PSTL_UDS_PRESENT
+#endif
+
+#if defined(_PSTL_EARLYEXIT_PRESENT)
+#    define _PSTL_PRAGMA_SIMD_EARLYEXIT _PSTL_PRAGMA(omp simd early_exit)
+#else
+#    define _PSTL_PRAGMA_SIMD_EARLYEXIT
+#endif
+
+#if defined(_PSTL_MONOTONIC_PRESENT)
+#    define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM) _PSTL_PRAGMA(omp ordered simd monotonic(PRM))
+#    define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2) _PSTL_PRAGMA(omp ordered simd monotonic(PRM1, PRM2))
+#else
+#    define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(PRM)
+#    define _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(PRM1, PRM2)
+#endif
+
+// Declaration of reduction functor, where
+// NAME - the name of the functor
+// OP - type of the callable object with the reduction operation
+// omp_in - refers to the local partial result
+// omp_out - refers to the final value of the combiner operator
+// omp_priv - refers to the private copy of the initial value
+// omp_orig - refers to the original variable to be reduced
+#define _PSTL_PRAGMA_DECLARE_REDUCTION(NAME, OP)                                                                       \
+    _PSTL_PRAGMA(omp declare reduction(NAME:OP : omp_out(omp_in)) initializer(omp_priv = omp_orig))
+
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1600
+#    define _PSTL_PRAGMA_VECTOR_UNALIGNED _PSTL_PRAGMA(vector unaligned)
+#else
+#    define _PSTL_PRAGMA_VECTOR_UNALIGNED
+#endif
+
+// Check the user-defined macro to use non-temporal stores
+#if defined(PSTL_USE_NONTEMPORAL_STORES) && (__INTEL_COMPILER >= 1600)
+#    define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED _PSTL_PRAGMA(vector nontemporal)
+#else
+#    define _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
+#endif
+
+#if defined(_MSC_VER) || defined(__INTEL_COMPILER) // the preprocessors don't type a message location
+#    define _PSTL_PRAGMA_LOCATION __FILE__ ":" _PSTL_STRING(__LINE__) ": [Parallel STL message]: "
+#else
+#    define _PSTL_PRAGMA_LOCATION " [Parallel STL message]: "
+#endif
+
+#define _PSTL_PRAGMA_MESSAGE_IMPL(x) _PSTL_PRAGMA(message(_PSTL_STRING_CONCAT(_PSTL_PRAGMA_LOCATION, x)))
+
+#if defined(_PSTL_USAGE_WARNINGS)
+#    define _PSTL_PRAGMA_MESSAGE(x) _PSTL_PRAGMA_MESSAGE_IMPL(x)
+#    define _PSTL_PRAGMA_MESSAGE_POLICIES(x) _PSTL_PRAGMA_MESSAGE_IMPL(x)
+#else
+#    define _PSTL_PRAGMA_MESSAGE(x)
+#    define _PSTL_PRAGMA_MESSAGE_POLICIES(x)
+#endif
+
+// broken macros
+#if (defined(__GLIBCXX__) && __GLIBCXX__ < 20150716) || \
+    (defined(_MSC_VER) && _MSC_VER < 1800)
+#   define _PSTL_CPP11_STD_ROTATE_BROKEN
+#endif
+
+#if defined(__INTEL_COMPILER) && __INTEL_COMPILER == 1800
+#   define _PSTL_ICC_18_OMP_SIMD_BROKEN
+#endif
+
+#endif /* _PSTL_CONFIG_H */

diff  --git a/libcxx/include/pstl/internal/unseq_backend_simd.h b/libcxx/include/pstl/internal/unseq_backend_simd.h
new file mode 100644
index 0000000000000..af2a143bc58ab
--- /dev/null
+++ b/libcxx/include/pstl/internal/unseq_backend_simd.h
@@ -0,0 +1,862 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_UNSEQ_BACKEND_SIMD_H
+#define _PSTL_UNSEQ_BACKEND_SIMD_H
+
+#include <type_traits>
+
+#include "pstl_config.h"
+#include "utils.h"
+
+// This header defines the minimum set of vector routines required
+// to support parallel STL.
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __unseq_backend
+{
+
+// Expect vector width up to 64 (or 512 bit)
+const std::size_t __lane_size = 64;
+
+template <class _Iterator, class _DifferenceType, class _Function>
+_Iterator
+__simd_walk_1(_Iterator __first, _DifferenceType __n, _Function __f) noexcept
+{
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        __f(__first[__i]);
+
+    return __first + __n;
+}
+
+template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Function>
+_Iterator2
+__simd_walk_2(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Function __f) noexcept
+{
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        __f(__first1[__i], __first2[__i]);
+    return __first2 + __n;
+}
+
+template <class _Iterator1, class _DifferenceType, class _Iterator2, class _Iterator3, class _Function>
+_Iterator3
+__simd_walk_3(_Iterator1 __first1, _DifferenceType __n, _Iterator2 __first2, _Iterator3 __first3,
+              _Function __f) noexcept
+{
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        __f(__first1[__i], __first2[__i], __first3[__i]);
+    return __first3 + __n;
+}
+
+// TODO: check whether __simd_first() can be used here
+template <class _Index, class _DifferenceType, class _Pred>
+bool
+__simd_or(_Index __first, _DifferenceType __n, _Pred __pred) noexcept
+{
+#if defined(_PSTL_EARLYEXIT_PRESENT)
+    _DifferenceType __i;
+    _PSTL_PRAGMA_VECTOR_UNALIGNED
+    _PSTL_PRAGMA_SIMD_EARLYEXIT
+    for (__i = 0; __i < __n; ++__i)
+        if (__pred(__first[__i]))
+            break;
+    return __i < __n;
+#else
+    _DifferenceType __block_size = 4 < __n ? 4 : __n;
+    const _Index __last = __first + __n;
+    while (__last != __first)
+    {
+        int32_t __flag = 1;
+        _PSTL_PRAGMA_SIMD_REDUCTION(& : __flag)
+        for (_DifferenceType __i = 0; __i < __block_size; ++__i)
+            if (__pred(*(__first + __i)))
+                __flag = 0;
+        if (!__flag)
+            return true;
+
+        __first += __block_size;
+        if (__last - __first >= __block_size << 1)
+        {
+            // Double the block _Size.  Any unnecessary iterations can be amortized against work done so far.
+            __block_size <<= 1;
+        }
+        else
+        {
+            __block_size = __last - __first;
+        }
+    }
+    return false;
+#endif
+}
+
+template <class _Index, class _DifferenceType, class _Compare>
+_Index
+__simd_first(_Index __first, _DifferenceType __begin, _DifferenceType __end, _Compare __comp) noexcept
+{
+#if defined(_PSTL_EARLYEXIT_PRESENT)
+    _DifferenceType __i = __begin;
+    _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part
+        _PSTL_PRAGMA_SIMD_EARLYEXIT for (; __i < __end; ++__i)
+    {
+        if (__comp(__first, __i))
+        {
+            break;
+        }
+    }
+    return __first + __i;
+#else
+    // Experiments show good block sizes like this
+    const _DifferenceType __block_size = 8;
+    alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
+    while (__end - __begin >= __block_size)
+    {
+        _DifferenceType __found = 0;
+        _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part
+            _PSTL_PRAGMA_SIMD_REDUCTION(|
+                                        : __found) for (_DifferenceType __i = __begin; __i < __begin + __block_size;
+                                                        ++__i)
+        {
+            const _DifferenceType __t = __comp(__first, __i);
+            __lane[__i - __begin] = __t;
+            __found |= __t;
+        }
+        if (__found)
+        {
+            _DifferenceType __i;
+            // This will vectorize
+            for (__i = 0; __i < __block_size; ++__i)
+            {
+                if (__lane[__i])
+                {
+                    break;
+                }
+            }
+            return __first + __begin + __i;
+        }
+        __begin += __block_size;
+    }
+
+    //Keep remainder scalar
+    while (__begin != __end)
+    {
+        if (__comp(__first, __begin))
+        {
+            return __first + __begin;
+        }
+        ++__begin;
+    }
+    return __first + __end;
+#endif //_PSTL_EARLYEXIT_PRESENT
+}
+
+template <class _Index1, class _DifferenceType, class _Index2, class _Pred>
+std::pair<_Index1, _Index2>
+__simd_first(_Index1 __first1, _DifferenceType __n, _Index2 __first2, _Pred __pred) noexcept
+{
+#if defined(_PSTL_EARLYEXIT_PRESENT)
+    _DifferenceType __i = 0;
+    _PSTL_PRAGMA_VECTOR_UNALIGNED
+    _PSTL_PRAGMA_SIMD_EARLYEXIT
+    for (; __i < __n; ++__i)
+        if (__pred(__first1[__i], __first2[__i]))
+            break;
+    return std::make_pair(__first1 + __i, __first2 + __i);
+#else
+    const _Index1 __last1 = __first1 + __n;
+    const _Index2 __last2 = __first2 + __n;
+    // Experiments show good block sizes like this
+    const _DifferenceType __block_size = 8;
+    alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
+    while (__last1 - __first1 >= __block_size)
+    {
+        _DifferenceType __found = 0;
+        _DifferenceType __i;
+        _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part
+            _PSTL_PRAGMA_SIMD_REDUCTION(|
+                                        : __found) for (__i = 0; __i < __block_size; ++__i)
+        {
+            const _DifferenceType __t = __pred(__first1[__i], __first2[__i]);
+            __lane[__i] = __t;
+            __found |= __t;
+        }
+        if (__found)
+        {
+            _DifferenceType __i2;
+            // This will vectorize
+            for (__i2 = 0; __i2 < __block_size; ++__i2)
+            {
+                if (__lane[__i2])
+                    break;
+            }
+            return std::make_pair(__first1 + __i2, __first2 + __i2);
+        }
+        __first1 += __block_size;
+        __first2 += __block_size;
+    }
+
+    //Keep remainder scalar
+    for (; __last1 != __first1; ++__first1, ++__first2)
+        if (__pred(*(__first1), *(__first2)))
+            return std::make_pair(__first1, __first2);
+
+    return std::make_pair(__last1, __last2);
+#endif //_PSTL_EARLYEXIT_PRESENT
+}
+
+template <class _Index, class _DifferenceType, class _Pred>
+_DifferenceType
+__simd_count(_Index __index, _DifferenceType __n, _Pred __pred) noexcept
+{
+    _DifferenceType __count = 0;
+    _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        if (__pred(*(__index + __i)))
+            ++__count;
+
+    return __count;
+}
+
+template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _BinaryPredicate>
+_OutputIterator
+__simd_unique_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator __result,
+                   _BinaryPredicate __pred) noexcept
+{
+    if (__n == 0)
+        return __result;
+
+    _DifferenceType __cnt = 1;
+    __result[0] = __first[0];
+
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 1; __i < __n; ++__i)
+    {
+        _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1)
+        if (!__pred(__first[__i], __first[__i - 1]))
+        {
+            __result[__cnt] = __first[__i];
+            ++__cnt;
+        }
+    }
+    return __result + __cnt;
+}
+
+template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _Assigner>
+_OutputIterator
+__simd_assign(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _Assigner __assigner) noexcept
+{
+    _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        __assigner(__first + __i, __result + __i);
+    return __result + __n;
+}
+
+template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _UnaryPredicate>
+_OutputIterator
+__simd_copy_if(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, _UnaryPredicate __pred) noexcept
+{
+    _DifferenceType __cnt = 0;
+
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+    {
+        _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1)
+        if (__pred(__first[__i]))
+        {
+            __result[__cnt] = __first[__i];
+            ++__cnt;
+        }
+    }
+    return __result + __cnt;
+}
+
+template <class _InputIterator, class _DifferenceType, class _BinaryPredicate>
+_DifferenceType
+__simd_calc_mask_2(_InputIterator __first, _DifferenceType __n, bool* __mask, _BinaryPredicate __pred) noexcept
+{
+    _DifferenceType __count = 0;
+
+    _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+    {
+        __mask[__i] = !__pred(__first[__i], __first[__i - 1]);
+        __count += __mask[__i];
+    }
+    return __count;
+}
+
+template <class _InputIterator, class _DifferenceType, class _UnaryPredicate>
+_DifferenceType
+__simd_calc_mask_1(_InputIterator __first, _DifferenceType __n, bool* __mask, _UnaryPredicate __pred) noexcept
+{
+    _DifferenceType __count = 0;
+
+    _PSTL_PRAGMA_SIMD_REDUCTION(+ : __count)
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+    {
+        __mask[__i] = __pred(__first[__i]);
+        __count += __mask[__i];
+    }
+    return __count;
+}
+
+template <class _InputIterator, class _DifferenceType, class _OutputIterator, class _Assigner>
+void
+__simd_copy_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator __result, bool* __mask,
+                    _Assigner __assigner) noexcept
+{
+    _DifferenceType __cnt = 0;
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+    {
+        if (__mask[__i])
+        {
+            _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1)
+            {
+                __assigner(__first + __i, __result + __cnt);
+                ++__cnt;
+            }
+        }
+    }
+}
+
+template <class _InputIterator, class _DifferenceType, class _OutputIterator1, class _OutputIterator2>
+void
+__simd_partition_by_mask(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true,
+                         _OutputIterator2 __out_false, bool* __mask) noexcept
+{
+    _DifferenceType __cnt_true = 0, __cnt_false = 0;
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+    {
+        _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true : 1, __cnt_false : 1)
+        if (__mask[__i])
+        {
+            __out_true[__cnt_true] = __first[__i];
+            ++__cnt_true;
+        }
+        else
+        {
+            __out_false[__cnt_false] = __first[__i];
+            ++__cnt_false;
+        }
+    }
+}
+
+template <class _Index, class _DifferenceType, class _Tp>
+_Index
+__simd_fill_n(_Index __first, _DifferenceType __n, const _Tp& __value) noexcept
+{
+    _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        __first[__i] = __value;
+    return __first + __n;
+}
+
+template <class _Index, class _DifferenceType, class _Generator>
+_Index
+__simd_generate_n(_Index __first, _DifferenceType __size, _Generator __g) noexcept
+{
+    _PSTL_USE_NONTEMPORAL_STORES_IF_ALLOWED
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __size; ++__i)
+        __first[__i] = __g();
+    return __first + __size;
+}
+
+template <class _Index, class _BinaryPredicate>
+_Index
+__simd_adjacent_find(_Index __first, _Index __last, _BinaryPredicate __pred, bool __or_semantic) noexcept
+{
+    if (__last - __first < 2)
+        return __last;
+
+    typedef typename std::iterator_traits<_Index>::
diff erence_type _DifferenceType;
+    _DifferenceType __i = 0;
+
+#if defined(_PSTL_EARLYEXIT_PRESENT)
+    //Some compiler versions fail to compile the following loop when iterators are used. Indices are used instead
+    const _DifferenceType __n = __last - __first - 1;
+    _PSTL_PRAGMA_VECTOR_UNALIGNED
+    _PSTL_PRAGMA_SIMD_EARLYEXIT
+    for (; __i < __n; ++__i)
+        if (__pred(__first[__i], __first[__i + 1]))
+            break;
+
+    return __i < __n ? __first + __i : __last;
+#else
+    // Experiments show good block sizes like this
+    //TODO: to consider tuning block_size for various data types
+    const _DifferenceType __block_size = 8;
+    alignas(__lane_size) _DifferenceType __lane[__block_size] = {0};
+    while (__last - __first >= __block_size)
+    {
+        _DifferenceType __found = 0;
+        _PSTL_PRAGMA_VECTOR_UNALIGNED // Do not generate peel loop part
+            _PSTL_PRAGMA_SIMD_REDUCTION(|
+                                        : __found) for (__i = 0; __i < __block_size - 1; ++__i)
+        {
+            //TODO: to improve SIMD vectorization
+            const _DifferenceType __t = __pred(*(__first + __i), *(__first + __i + 1));
+            __lane[__i] = __t;
+            __found |= __t;
+        }
+
+        //Process a pair of elements on a boundary of a data block
+        if (__first + __block_size < __last && __pred(*(__first + __i), *(__first + __i + 1)))
+            __lane[__i] = __found = 1;
+
+        if (__found)
+        {
+            if (__or_semantic)
+                return __first;
+
+            // This will vectorize
+            for (__i = 0; __i < __block_size; ++__i)
+                if (__lane[__i])
+                    break;
+            return __first + __i; //As far as found is true a __result (__lane[__i] is true) is guaranteed
+        }
+        __first += __block_size;
+    }
+    //Process the rest elements
+    for (; __last - __first > 1; ++__first)
+        if (__pred(*__first, *(__first + 1)))
+            return __first;
+
+    return __last;
+#endif
+}
+
+// It was created to reduce the code inside std::enable_if
+template <typename _Tp, typename _BinaryOperation>
+using is_arithmetic_plus = std::integral_constant<bool, std::is_arithmetic<_Tp>::value &&
+                                                            std::is_same<_BinaryOperation, std::plus<_Tp>>::value>;
+
+template <typename _DifferenceType, typename _Tp, typename _BinaryOperation, typename _UnaryOperation>
+typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, _Tp>::type
+__simd_transform_reduce(_DifferenceType __n, _Tp __init, _BinaryOperation, _UnaryOperation __f) noexcept
+{
+    _PSTL_PRAGMA_SIMD_REDUCTION(+ : __init)
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+        __init += __f(__i);
+    return __init;
+}
+
+template <typename _Size, typename _Tp, typename _BinaryOperation, typename _UnaryOperation>
+typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, _Tp>::type
+__simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept
+{
+    const _Size __block_size = __lane_size / sizeof(_Tp);
+    if (__n > 2 * __block_size && __block_size > 1)
+    {
+        alignas(__lane_size) char __lane_[__lane_size];
+        _Tp* __lane = reinterpret_cast<_Tp*>(__lane_);
+
+        // initializer
+        _PSTL_PRAGMA_SIMD
+        for (_Size __i = 0; __i < __block_size; ++__i)
+        {
+            ::new (__lane + __i) _Tp(__binary_op(__f(__i), __f(__block_size + __i)));
+        }
+        // main loop
+        _Size __i = 2 * __block_size;
+        const _Size last_iteration = __block_size * (__n / __block_size);
+        for (; __i < last_iteration; __i += __block_size)
+        {
+            _PSTL_PRAGMA_SIMD
+            for (_Size __j = 0; __j < __block_size; ++__j)
+            {
+                __lane[__j] = __binary_op(__lane[__j], __f(__i + __j));
+            }
+        }
+        // remainder
+        _PSTL_PRAGMA_SIMD
+        for (_Size __j = 0; __j < __n - last_iteration; ++__j)
+        {
+            __lane[__j] = __binary_op(__lane[__j], __f(last_iteration + __j));
+        }
+        // combiner
+        for (_Size __j = 0; __j < __block_size; ++__j)
+        {
+            __init = __binary_op(__init, __lane[__j]);
+        }
+        // destroyer
+        _PSTL_PRAGMA_SIMD
+        for (_Size __j = 0; __j < __block_size; ++__j)
+        {
+            __lane[__j].~_Tp();
+        }
+    }
+    else
+    {
+        for (_Size __i = 0; __i < __n; ++__i)
+        {
+            __init = __binary_op(__init, __f(__i));
+        }
+    }
+    return __init;
+}
+
+// Exclusive scan for "+" and arithmetic types
+template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
+          class _BinaryOperation>
+typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
+__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
+            _BinaryOperation, /*Inclusive*/ std::false_type)
+{
+    _PSTL_PRAGMA_SIMD_SCAN(+ : __init)
+    for (_Size __i = 0; __i < __n; ++__i)
+    {
+        __result[__i] = __init;
+        _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init)
+        __init += __unary_op(__first[__i]);
+    }
+    return std::make_pair(__result + __n, __init);
+}
+
+// As soon as we cannot call __binary_op in "combiner" we create a wrapper over _Tp to encapsulate __binary_op
+template <typename _Tp, typename _BinaryOp>
+struct _Combiner
+{
+    _Tp __value;
+    _BinaryOp* __bin_op; // Here is a pointer to function because of default ctor
+
+    _Combiner() : __value{}, __bin_op(nullptr) {}
+    _Combiner(const _Tp& value, const _BinaryOp* bin_op) : __value(value), __bin_op(const_cast<_BinaryOp*>(bin_op)) {}
+    _Combiner(const _Combiner& __obj) : __value{}, __bin_op(__obj.__bin_op) {}
+
+    void
+    operator()(const _Combiner& __obj)
+    {
+        __value = (*__bin_op)(__value, __obj.__value);
+    }
+};
+
+// Exclusive scan for other binary operations and types
+template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
+          class _BinaryOperation>
+typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
+__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
+            _BinaryOperation __binary_op, /*Inclusive*/ std::false_type)
+{
+    typedef _Combiner<_Tp, _BinaryOperation> _CombinerType;
+    _CombinerType __init_{__init, &__binary_op};
+
+    _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType)
+
+    _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_)
+    for (_Size __i = 0; __i < __n; ++__i)
+    {
+        __result[__i] = __init_.__value;
+        _PSTL_PRAGMA_SIMD_EXCLUSIVE_SCAN(__init_)
+        _PSTL_PRAGMA_FORCEINLINE
+        __init_.__value = __binary_op(__init_.__value, __unary_op(__first[__i]));
+    }
+    return std::make_pair(__result + __n, __init_.__value);
+}
+
+// Inclusive scan for "+" and arithmetic types
+template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
+          class _BinaryOperation>
+typename std::enable_if<is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
+__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
+            _BinaryOperation, /*Inclusive*/ std::true_type)
+{
+    _PSTL_PRAGMA_SIMD_SCAN(+ : __init)
+    for (_Size __i = 0; __i < __n; ++__i)
+    {
+        __init += __unary_op(__first[__i]);
+        _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init)
+        __result[__i] = __init;
+    }
+    return std::make_pair(__result + __n, __init);
+}
+
+// Inclusive scan for other binary operations and types
+template <class _InputIterator, class _Size, class _OutputIterator, class _UnaryOperation, class _Tp,
+          class _BinaryOperation>
+typename std::enable_if<!is_arithmetic_plus<_Tp, _BinaryOperation>::value, std::pair<_OutputIterator, _Tp>>::type
+__simd_scan(_InputIterator __first, _Size __n, _OutputIterator __result, _UnaryOperation __unary_op, _Tp __init,
+            _BinaryOperation __binary_op, std::true_type)
+{
+    typedef _Combiner<_Tp, _BinaryOperation> _CombinerType;
+    _CombinerType __init_{__init, &__binary_op};
+
+    _PSTL_PRAGMA_DECLARE_REDUCTION(__bin_op, _CombinerType)
+
+    _PSTL_PRAGMA_SIMD_SCAN(__bin_op : __init_)
+    for (_Size __i = 0; __i < __n; ++__i)
+    {
+        _PSTL_PRAGMA_FORCEINLINE
+        __init_.__value = __binary_op(__init_.__value, __unary_op(__first[__i]));
+        _PSTL_PRAGMA_SIMD_INCLUSIVE_SCAN(__init_)
+        __result[__i] = __init_.__value;
+    }
+    return std::make_pair(__result + __n, __init_.__value);
+}
+
+// [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
+// complexity [violation] - We will have at most (__n-1 + number_of_lanes) comparisons instead of at most __n-1.
+template <typename _ForwardIterator, typename _Size, typename _Compare>
+_ForwardIterator
+__simd_min_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept
+{
+    if (__n == 0)
+    {
+        return __first;
+    }
+
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType;
+    struct _ComplexType
+    {
+        _ValueType __min_val;
+        _Size __min_ind;
+        _Compare* __min_comp;
+
+        _ComplexType() : __min_val{}, __min_ind{}, __min_comp(nullptr) {}
+        _ComplexType(const _ValueType& val, const _Compare* comp)
+            : __min_val(val), __min_ind(0), __min_comp(const_cast<_Compare*>(comp))
+        {
+        }
+        _ComplexType(const _ComplexType& __obj)
+            : __min_val(__obj.__min_val), __min_ind(__obj.__min_ind), __min_comp(__obj.__min_comp)
+        {
+        }
+
+        _PSTL_PRAGMA_DECLARE_SIMD
+        void
+        operator()(const _ComplexType& __obj)
+        {
+            if (!(*__min_comp)(__min_val, __obj.__min_val) &&
+                ((*__min_comp)(__obj.__min_val, __min_val) || __obj.__min_ind - __min_ind < 0))
+            {
+                __min_val = __obj.__min_val;
+                __min_ind = __obj.__min_ind;
+            }
+        }
+    };
+
+    _ComplexType __init{*__first, &__comp};
+
+    _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType)
+
+    _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init)
+    for (_Size __i = 1; __i < __n; ++__i)
+    {
+        const _ValueType __min_val = __init.__min_val;
+        const _ValueType __current = __first[__i];
+        if (__comp(__current, __min_val))
+        {
+            __init.__min_val = __current;
+            __init.__min_ind = __i;
+        }
+    }
+    return __first + __init.__min_ind;
+}
+
+// [restriction] - std::iterator_traits<_ForwardIterator>::value_type should be DefaultConstructible.
+// complexity [violation] - We will have at most (2*(__n-1) + 4*number_of_lanes) comparisons instead of at most [1.5*(__n-1)].
+template <typename _ForwardIterator, typename _Size, typename _Compare>
+std::pair<_ForwardIterator, _ForwardIterator>
+__simd_minmax_element(_ForwardIterator __first, _Size __n, _Compare __comp) noexcept
+{
+    if (__n == 0)
+    {
+        return std::make_pair(__first, __first);
+    }
+    typedef typename std::iterator_traits<_ForwardIterator>::value_type _ValueType;
+
+    struct _ComplexType
+    {
+        _ValueType __min_val;
+        _ValueType __max_val;
+        _Size __min_ind;
+        _Size __max_ind;
+        _Compare* __minmax_comp;
+
+        _ComplexType() : __min_val{}, __max_val{}, __min_ind{}, __max_ind{}, __minmax_comp(nullptr) {}
+        _ComplexType(const _ValueType& min_val, const _ValueType& max_val, const _Compare* comp)
+            : __min_val(min_val), __max_val(max_val), __min_ind(0), __max_ind(0),
+              __minmax_comp(const_cast<_Compare*>(comp))
+        {
+        }
+        _ComplexType(const _ComplexType& __obj)
+            : __min_val(__obj.__min_val), __max_val(__obj.__max_val), __min_ind(__obj.__min_ind),
+              __max_ind(__obj.__max_ind), __minmax_comp(__obj.__minmax_comp)
+        {
+        }
+
+        void
+        operator()(const _ComplexType& __obj)
+        {
+            // min
+            if ((*__minmax_comp)(__obj.__min_val, __min_val))
+            {
+                __min_val = __obj.__min_val;
+                __min_ind = __obj.__min_ind;
+            }
+            else if (!(*__minmax_comp)(__min_val, __obj.__min_val))
+            {
+                __min_val = __obj.__min_val;
+                __min_ind = (__min_ind - __obj.__min_ind < 0) ? __min_ind : __obj.__min_ind;
+            }
+
+            // max
+            if ((*__minmax_comp)(__max_val, __obj.__max_val))
+            {
+                __max_val = __obj.__max_val;
+                __max_ind = __obj.__max_ind;
+            }
+            else if (!(*__minmax_comp)(__obj.__max_val, __max_val))
+            {
+                __max_val = __obj.__max_val;
+                __max_ind = (__max_ind - __obj.__max_ind < 0) ? __obj.__max_ind : __max_ind;
+            }
+        }
+    };
+
+    _ComplexType __init{*__first, *__first, &__comp};
+
+    _PSTL_PRAGMA_DECLARE_REDUCTION(__min_func, _ComplexType);
+
+    _PSTL_PRAGMA_SIMD_REDUCTION(__min_func : __init)
+    for (_Size __i = 1; __i < __n; ++__i)
+    {
+        auto __min_val = __init.__min_val;
+        auto __max_val = __init.__max_val;
+        auto __current = __first + __i;
+        if (__comp(*__current, __min_val))
+        {
+            __init.__min_val = *__current;
+            __init.__min_ind = __i;
+        }
+        else if (!__comp(*__current, __max_val))
+        {
+            __init.__max_val = *__current;
+            __init.__max_ind = __i;
+        }
+    }
+    return std::make_pair(__first + __init.__min_ind, __first + __init.__max_ind);
+}
+
+template <class _InputIterator, class _DifferenceType, class _OutputIterator1, class _OutputIterator2,
+          class _UnaryPredicate>
+std::pair<_OutputIterator1, _OutputIterator2>
+__simd_partition_copy(_InputIterator __first, _DifferenceType __n, _OutputIterator1 __out_true,
+                      _OutputIterator2 __out_false, _UnaryPredicate __pred) noexcept
+{
+    _DifferenceType __cnt_true = 0, __cnt_false = 0;
+
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 0; __i < __n; ++__i)
+    {
+        _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC_2ARGS(__cnt_true : 1, __cnt_false : 1)
+        if (__pred(__first[__i]))
+        {
+            __out_true[__cnt_true] = __first[__i];
+            ++__cnt_true;
+        }
+        else
+        {
+            __out_false[__cnt_false] = __first[__i];
+            ++__cnt_false;
+        }
+    }
+    return std::make_pair(__out_true + __cnt_true, __out_false + __cnt_false);
+}
+
+template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
+_ForwardIterator1
+__simd_find_first_of(_ForwardIterator1 __first, _ForwardIterator1 __last, _ForwardIterator2 __s_first,
+                     _ForwardIterator2 __s_last, _BinaryPredicate __pred) noexcept
+{
+    typedef typename std::iterator_traits<_ForwardIterator1>::
diff erence_type _DifferencType;
+
+    const _DifferencType __n1 = __last - __first;
+    const _DifferencType __n2 = __s_last - __s_first;
+    if (__n1 == 0 || __n2 == 0)
+    {
+        return __last; // according to the standard
+    }
+
+    // Common case
+    // If first sequence larger than second then we'll run simd_first with parameters of first sequence.
+    // Otherwise, vice versa.
+    if (__n1 < __n2)
+    {
+        for (; __first != __last; ++__first)
+        {
+            if (__unseq_backend::__simd_or(
+                    __s_first, __n2,
+                    __internal::__equal_value_by_pred<decltype(*__first), _BinaryPredicate>(*__first, __pred)))
+            {
+                return __first;
+            }
+        }
+    }
+    else
+    {
+        for (; __s_first != __s_last; ++__s_first)
+        {
+            const auto __result = __unseq_backend::__simd_first(
+                __first, _DifferencType(0), __n1, [__s_first, &__pred](_ForwardIterator1 __it, _DifferencType __i) {
+                    return __pred(__it[__i], *__s_first);
+                });
+            if (__result != __last)
+            {
+                return __result;
+            }
+        }
+    }
+    return __last;
+}
+
+template <class _RandomAccessIterator, class _DifferenceType, class _UnaryPredicate>
+_RandomAccessIterator
+__simd_remove_if(_RandomAccessIterator __first, _DifferenceType __n, _UnaryPredicate __pred) noexcept
+{
+    // find first element we need to remove
+    auto __current = __unseq_backend::__simd_first(
+        __first, _DifferenceType(0), __n,
+        [&__pred](_RandomAccessIterator __it, _DifferenceType __i) { return __pred(__it[__i]); });
+    __n -= __current - __first;
+
+    // if we have in sequence only one element that pred(__current[1]) != false we can exit the function
+    if (__n < 2)
+    {
+        return __current;
+    }
+
+    _DifferenceType __cnt = 0;
+    _PSTL_PRAGMA_SIMD
+    for (_DifferenceType __i = 1; __i < __n; ++__i)
+    {
+        _PSTL_PRAGMA_SIMD_ORDERED_MONOTONIC(__cnt : 1)
+        if (!__pred(__current[__i]))
+        {
+            __current[__cnt] = std::move(__current[__i]);
+            ++__cnt;
+        }
+    }
+    return __current + __cnt;
+}
+} // namespace __unseq_backend
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_UNSEQ_BACKEND_SIMD_H */

diff  --git a/libcxx/include/pstl/internal/utils.h b/libcxx/include/pstl/internal/utils.h
new file mode 100644
index 0000000000000..ec9d46713a682
--- /dev/null
+++ b/libcxx/include/pstl/internal/utils.h
@@ -0,0 +1,177 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _PSTL_UTILS_H
+#define _PSTL_UTILS_H
+
+#include <new>
+#include <iterator>
+
+_PSTL_HIDE_FROM_ABI_PUSH
+
+namespace __pstl
+{
+namespace __internal
+{
+
+template <typename _Fp>
+auto
+__except_handler(_Fp __f) -> decltype(__f())
+{
+    try
+    {
+        return __f();
+    }
+    catch (const std::bad_alloc&)
+    {
+        throw; // re-throw bad_alloc according to the standard [algorithms.parallel.exceptions]
+    }
+    catch (...)
+    {
+        std::terminate(); // Good bye according to the standard [algorithms.parallel.exceptions]
+    }
+}
+
+template <typename _Fp>
+void
+__invoke_if(std::true_type, _Fp __f)
+{
+    __f();
+}
+
+template <typename _Fp>
+void __invoke_if(std::false_type, _Fp)
+{
+}
+
+template <typename _Fp>
+void
+__invoke_if_not(std::false_type, _Fp __f)
+{
+    __f();
+}
+
+template <typename _Fp>
+void __invoke_if_not(std::true_type, _Fp)
+{
+}
+
+template <typename _F1, typename _F2>
+auto
+__invoke_if_else(std::true_type, _F1 __f1, _F2) -> decltype(__f1())
+{
+    return __f1();
+}
+
+template <typename _F1, typename _F2>
+auto
+__invoke_if_else(std::false_type, _F1, _F2 __f2) -> decltype(__f2())
+{
+    return __f2();
+}
+
+//! Unary operator that returns reference to its argument.
+struct __no_op
+{
+    template <typename _Tp>
+    _Tp&&
+    operator()(_Tp&& __a) const
+    {
+        return std::forward<_Tp>(__a);
+    }
+};
+
+template <typename _Pred>
+class __reorder_pred
+{
+    _Pred _M_pred;
+
+  public:
+    explicit __reorder_pred(_Pred __pred) : _M_pred(__pred) {}
+
+    template <typename _FTp, typename _STp>
+    bool
+    operator()(_FTp&& __a, _STp&& __b)
+    {
+        return _M_pred(std::forward<_STp>(__b), std::forward<_FTp>(__a));
+    }
+};
+
+//! Like a polymorphic lambda for pred(...,value)
+template <typename _Tp, typename _Predicate>
+class __equal_value_by_pred
+{
+    const _Tp& _M_value;
+    _Predicate _M_pred;
+
+  public:
+    __equal_value_by_pred(const _Tp& __value, _Predicate __pred) : _M_value(__value), _M_pred(__pred) {}
+
+    template <typename _Arg>
+    bool
+    operator()(_Arg&& __arg)
+    {
+        return _M_pred(std::forward<_Arg>(__arg), _M_value);
+    }
+};
+
+//! Like a polymorphic lambda for ==value
+template <typename _Tp>
+class __equal_value
+{
+    const _Tp& _M_value;
+
+  public:
+    explicit __equal_value(const _Tp& __value) : _M_value(__value) {}
+
+    template <typename _Arg>
+    bool
+    operator()(_Arg&& __arg) const
+    {
+        return std::forward<_Arg>(__arg) == _M_value;
+    }
+};
+
+//! Logical negation of ==value
+template <typename _Tp>
+class __not_equal_value
+{
+    const _Tp& _M_value;
+
+  public:
+    explicit __not_equal_value(const _Tp& __value) : _M_value(__value) {}
+
+    template <typename _Arg>
+    bool
+    operator()(_Arg&& __arg) const
+    {
+        return !(std::forward<_Arg>(__arg) == _M_value);
+    }
+};
+
+template <typename _ForwardIterator, typename _Compare>
+_ForwardIterator
+__cmp_iterators_by_values(_ForwardIterator __a, _ForwardIterator __b, _Compare __comp)
+{
+    if (__a < __b)
+    { // we should return closer iterator
+        return __comp(*__b, *__a) ? __b : __a;
+    }
+    else
+    {
+        return __comp(*__a, *__b) ? __a : __b;
+    }
+}
+
+} // namespace __internal
+} // namespace __pstl
+
+_PSTL_HIDE_FROM_ABI_POP
+
+#endif /* _PSTL_UTILS_H */

diff  --git a/libcxx/test/libcxx/lint/lint_headers.sh.py b/libcxx/test/libcxx/lint/lint_headers.sh.py
index cb6054cd46e04..173f98b86ab7f 100644
--- a/libcxx/test/libcxx/lint/lint_headers.sh.py
+++ b/libcxx/test/libcxx/lint/lint_headers.sh.py
@@ -15,6 +15,7 @@ def exclude_from_consideration(path):
         os.path.basename(path) == '__config' or
         os.path.basename(path) == '__config_site.in' or
         os.path.basename(path) == 'libcxx.imp' or
+        os.path.basename(path).startswith('__pstl') or # TODO: Remove once PSTL integration is finished
         not os.path.isfile(path)
     )
 

diff  --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp
index 00283594e8474..7d069251a05e8 100644
--- a/libcxx/test/libcxx/private_headers.verify.cpp
+++ b/libcxx/test/libcxx/private_headers.verify.cpp
@@ -25,6 +25,9 @@ for header in private_headers:
 
   # Skip the locale API headers, since they are platform-specific and thus inherently non-modular
   if 'locale_base_api' in header:
+
+  # TODO: Stop skipping PSTL headers once their integration is finished.
+  if header.startswith('__pstl'):
     continue
 
   print("{ifdef}#{indent}include <{header}> // {expected_error}@*:* {{{{use of private header from outside its module: '{header}'}}}}{endif}".format(

diff  --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt
index 24cfc241fbd9f..044a395b352ce 100644
--- a/libcxx/utils/data/ignore_format.txt
+++ b/libcxx/utils/data/ignore_format.txt
@@ -506,6 +506,39 @@ libcxx/include/__numeric/transform_inclusive_scan.h
 libcxx/include/__numeric/transform_reduce.h
 libcxx/include/optional
 libcxx/include/ostream
+libcxx/include/pstl/internal/algorithm_fwd.h
+libcxx/include/pstl/internal/algorithm_impl.h
+libcxx/include/pstl/internal/execution_defs.h
+libcxx/include/pstl/internal/execution_impl.h
+libcxx/include/pstl/internal/glue_algorithm_defs.h
+libcxx/include/pstl/internal/glue_algorithm_impl.h
+libcxx/include/pstl/internal/glue_execution_defs.h
+libcxx/include/pstl/internal/glue_memory_defs.h
+libcxx/include/pstl/internal/glue_memory_impl.h
+libcxx/include/pstl/internal/glue_numeric_defs.h
+libcxx/include/pstl/internal/glue_numeric_impl.h
+libcxx/include/pstl/internal/memory_impl.h
+libcxx/include/pstl/internal/numeric_fwd.h
+libcxx/include/pstl/internal/numeric_impl.h
+libcxx/include/pstl/internal/omp/parallel_for_each.h
+libcxx/include/pstl/internal/omp/parallel_for.h
+libcxx/include/pstl/internal/omp/parallel_invoke.h
+libcxx/include/pstl/internal/omp/parallel_merge.h
+libcxx/include/pstl/internal/omp/parallel_reduce.h
+libcxx/include/pstl/internal/omp/parallel_scan.h
+libcxx/include/pstl/internal/omp/parallel_stable_partial_sort.h
+libcxx/include/pstl/internal/omp/parallel_stable_sort.h
+libcxx/include/pstl/internal/omp/parallel_transform_reduce.h
+libcxx/include/pstl/internal/omp/parallel_transform_scan.h
+libcxx/include/pstl/internal/omp/util.h
+libcxx/include/pstl/internal/parallel_backend.h
+libcxx/include/pstl/internal/parallel_backend_serial.h
+libcxx/include/pstl/internal/parallel_backend_tbb.h
+libcxx/include/pstl/internal/parallel_backend_utils.h
+libcxx/include/pstl/internal/parallel_impl.h
+libcxx/include/pstl/internal/pstl_config.h
+libcxx/include/pstl/internal/unseq_backend_simd.h
+libcxx/include/pstl/internal/utils.h
 libcxx/include/queue
 libcxx/include/__random/bernoulli_distribution.h
 libcxx/include/__random/binomial_distribution.h

diff  --git a/libcxx/utils/generate_iwyu_mapping.py b/libcxx/utils/generate_iwyu_mapping.py
index c092bd6a53ae1..cb27d46773955 100644
--- a/libcxx/utils/generate_iwyu_mapping.py
+++ b/libcxx/utils/generate_iwyu_mapping.py
@@ -50,6 +50,11 @@ def generate_map(include):
         elif i == '__mbstate_t.h': continue
         elif i == '__mutex_base': continue
         elif i == '__node_handle': public = ['map', 'set', 'unordered_map', 'unordered_set']
+        elif i == '__pstl_algorithm': continue
+        elif i == '__pstl_config_site.in': continue
+        elif i == '__pstl_execution': continue
+        elif i == '__pstl_memory': continue
+        elif i == '__pstl_numeric': continue
         elif i == '__split_buffer': public = ['deque', 'vector']
         elif i == '__threading_support': public = ['atomic', 'mutex', 'semaphore', 'thread']
         elif i == '__tree': public = ['map', 'set']


        


More information about the libcxx-commits mailing list