[libcxx-commits] [libcxx] [libcxxabi] [llvm] Adding Support for Offloading C++ standard algorithms (PR #116869)

Tue Nov 19 12:00:40 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-github-workflow

Author: Vedant Tewari (xevor11)

<details>
<summary>Changes</summary>



---

Patch is 371.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116869.diff


36 Files Affected:

- (modified) .github/workflows/libcxx-build-and-test.yaml (+1) 
- (modified) libcxx/CMakeLists.txt (+10-2) 
- (added) libcxx/cmake/caches/Generic-pstl-openmp.cmake (+1) 
- (modified) libcxx/docs/UserDocumentation.rst (+207-132) 
- (modified) libcxx/docs/VendorDocumentation.rst (+227-180) 
- (modified) libcxx/include/CMakeLists.txt (+1) 
- (modified) libcxx/include/__algorithm/ranges_find_last.h (+55) 
- (added) libcxx/include/__algorithm/ranges_find_last_if.h (+81) 
- (added) libcxx/include/__algorithm/ranges_find_last_if_not.h (+81) 
- (added) libcxx/include/__algorithm/ranges_shift_left.h (+74) 
- (added) libcxx/include/__algorithm/ranges_shift_right.h (+75) 
- (modified) libcxx/include/__config_site.in (+1) 
- (modified) libcxx/include/__pstl/backend.h (+14-14) 
- (modified) libcxx/include/__pstl/backend_fwd.h (+10-10) 
- (added) libcxx/include/__pstl/backends/openmp.h (+511) 
- (modified) libcxx/include/__pstl/dispatch.h (+15) 
- (modified) libcxx/include/module.modulemap (+2064-2236) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/fill_offload.pass.cpp (+52) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/find_if.pass.cpp (+67) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/find_if_funptr.pass.cpp (+36) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/find_if_offload.pass.cpp (+39) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/for_each_funptr.pass.cpp (+36) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/for_each_lambda.pass.cpp (+49) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/for_each_offload.pass.cpp (+39) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/for_each_overwrite_input.pass.cpp (+63) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/gpu_environment_variables.pass.cpp (+49) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/openmp_version_40.verify.cpp (+21) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/openmp_version_45.verify.cpp (+21) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/openmp_version_51.verify.cpp (+21) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/transform_offload.pass.cpp (+55) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/transform_reduce_offload.pass.cpp (+41) 
- (added) libcxx/test/libcxx/algorithms/alg.pstl.openmp/transform_reduce_supported_binary_operations.pass.cpp (+199) 
- (modified) libcxx/utils/ci/run-buildbot (+166-176) 
- (modified) libcxx/utils/libcxx/test/features.py (+104-178) 
- (modified) libcxx/utils/run.py (+15) 
- (modified) libcxxabi/CMakeLists.txt (+8) 


``````````diff

diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml
index 2184ddd49537b5..9e483612bc9943 100644
--- a/.github/workflows/libcxx-build-and-test.yaml
+++ b/.github/workflows/libcxx-build-and-test.yaml
@@ -158,6 +158,7 @@ jobs:
           'generic-no-wide-characters',
           'generic-no-rtti',
           'generic-optimized-speed',
+          'generic-pstl-openmp',
           'generic-static',
           'bootstrapping-build'
         ]
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index abe12c2805a7cf..dee2a75f74d89f 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -300,10 +300,11 @@ option(LIBCXX_HAS_EXTERNAL_THREAD_API
    This option may only be set to ON when LIBCXX_ENABLE_THREADS=ON." OFF)
 
 if (LIBCXX_ENABLE_THREADS)
-  set(LIBCXX_PSTL_BACKEND "std_thread" CACHE STRING "Which PSTL backend to use")
+  set(LIBCXX_PSTL_BACKEND_DEFAULT "std_thread")
 else()
-  set(LIBCXX_PSTL_BACKEND "serial" CACHE STRING "Which PSTL backend to use")
+  set(LIBCXX_PSTL_BACKEND_DEFAULT "serial")
 endif()
+set(LIBCXX_PSTL_BACKEND "${LIBCXX_PSTL_BACKEND_DEFAULT}" CACHE STRING "Select the PSTL backend to use. Valid values are serial, std-thread, libdispatch, openmp. Default: ${LIBCXX_PSTL_BACKEND_DEFAULT}")
 
 # Misc options ----------------------------------------------------------------
 # FIXME: Turn -pedantic back ON. It is currently off because it warns
@@ -552,6 +553,11 @@ function(cxx_add_basic_build_flags target)
     endif()
   endif()
   target_compile_options(${target} PUBLIC "${LIBCXX_ADDITIONAL_COMPILE_FLAGS}")
+
+  # If the PSTL backend depends on OpenMP, we must enable the OpenMP tool chain
+  if (LIBCXX_PSTL_BACKEND STREQUAL "openmp")
+    target_add_compile_flags_if_supported(${target} PUBLIC -fopenmp)
+  endif()
 endfunction()
 
 # Exception flags =============================================================
@@ -784,6 +790,8 @@ elseif(LIBCXX_PSTL_BACKEND STREQUAL "std_thread")
   config_define(1 _LIBCPP_PSTL_BACKEND_STD_THREAD)
 elseif(LIBCXX_PSTL_BACKEND STREQUAL "libdispatch")
   config_define(1 _LIBCPP_PSTL_BACKEND_LIBDISPATCH)
+elseif (LIBCXX_PSTL_BACKEND STREQUAL "openmp")
+  config_define(1 _LIBCPP_PSTL_BACKEND_OPENMP)
 else()
   message(FATAL_ERROR "LIBCXX_PSTL_BACKEND is set to ${LIBCXX_PSTL_BACKEND}, which is not a valid backend.
                        Valid backends are: serial, std_thread and libdispatch")
diff --git a/libcxx/cmake/caches/Generic-pstl-openmp.cmake b/libcxx/cmake/caches/Generic-pstl-openmp.cmake
new file mode 100644
index 00000000000000..f3ff4f3b57fd21
--- /dev/null
+++ b/libcxx/cmake/caches/Generic-pstl-openmp.cmake
@@ -0,0 +1 @@
+set(LIBCXX_PSTL_BACKEND openmp CACHE STRING "")
diff --git a/libcxx/docs/UserDocumentation.rst b/libcxx/docs/UserDocumentation.rst
index 2c1bc1373659c3..f1e7b19ead5798 100644
--- a/libcxx/docs/UserDocumentation.rst
+++ b/libcxx/docs/UserDocumentation.rst
@@ -1,17 +1,19 @@
-.. _user-documentation:
+.. _using-libcxx:
 
-==================
-User documentation
-==================
+============
+Using libc++
+============
 
 .. contents::
   :local:
 
+Usually, libc++ is packaged and shipped by a vendor through some delivery vehicle
+(operating system distribution, SDK, toolchain, etc) and users don't need to do
+anything special in order to use the library.
+
 This page contains information about configuration knobs that can be used by
 users when they know libc++ is used by their toolchain, and how to use libc++
-when it is not the default library used by their toolchain. It is aimed at
-users of libc++: a separate page contains documentation aimed at vendors who
-build and ship libc++ as part of their toolchain.
+when it is not the default library used by their toolchain.
 
 
 Using a different version of the C++ Standard
@@ -26,29 +28,10 @@ matches that Standard in the library.
 
   $ clang++ -std=c++17 test.cpp
 
-Note that using ``-std=c++XY`` with a version of the Standard that has not been ratified
-yet is considered unstable. While we strive to maintain stability, libc++ may be forced to
-make breaking changes to features shipped in a Standard that hasn't been ratified yet. Use
-these versions of the Standard at your own risk.
-
-
-Using libc++ when it is not the system default
-==============================================
-
-Usually, libc++ is packaged and shipped by a vendor through some delivery vehicle
-(operating system distribution, SDK, toolchain, etc) and users don't need to do
-anything special in order to use the library.
-
-On systems where libc++ is provided but is not the default, Clang provides a flag
-called ``-stdlib=`` that can be used to decide which standard library is used.
-Using ``-stdlib=libc++`` will select libc++:
-
-.. code-block:: bash
-
-  $ clang++ -stdlib=libc++ test.cpp
-
-On systems where libc++ is the library in use by default such as macOS and FreeBSD,
-this flag is not required.
+.. warning::
+  Using ``-std=c++XY`` with a version of the Standard that has not been ratified yet
+  is considered unstable. Libc++ reserves the right to make breaking changes to the
+  library until the standard has been ratified.
 
 
 Enabling experimental C++ Library features
@@ -60,19 +43,15 @@ the Standard but whose implementation is not complete or stable yet in libc++. T
 are disabled by default because they are neither API nor ABI stable. However, the
 ``-fexperimental-library`` compiler flag can be defined to turn those features on.
 
-On compilers that do not support the ``-fexperimental-library`` flag (such as GCC),
-users can define the ``_LIBCPP_ENABLE_EXPERIMENTAL`` macro and manually link against
-the appropriate static library (usually shipped as ``libc++experimental.a``) to get
-access to experimental library features.
-
 The following features are currently considered experimental and are only provided
 when ``-fexperimental-library`` is passed:
 
 * The parallel algorithms library (``<execution>`` and the associated algorithms)
+* ``std::stop_token``, ``std::stop_source`` and ``std::stop_callback``
+* ``std::jthread``
 * ``std::chrono::tzdb`` and related time zone functionality
-* ``<syncstream>``
 
-.. note::
+.. warning::
   Experimental libraries are experimental.
     * The contents of the ``<experimental/...>`` headers and the associated static
       library will not remain compatible between versions.
@@ -81,18 +60,98 @@ when ``-fexperimental-library`` is passed:
       the experimental feature is removed two releases after the non-experimental
       version has shipped. The full policy is explained :ref:`here <experimental features>`.
 
+.. note::
+  On compilers that do not support the ``-fexperimental-library`` flag, users can
+  define the ``_LIBCPP_ENABLE_EXPERIMENTAL`` macro and manually link against the
+  appropriate static library (usually shipped as ``libc++experimental.a``) to get
+  access to experimental library features.
 
-Libc++ Configuration Macros
+
+Using libc++ when it is not the system default
+==============================================
+
+On systems where libc++ is provided but is not the default, Clang provides a flag
+called ``-stdlib=`` that can be used to decide which standard library is used.
+Using ``-stdlib=libc++`` will select libc++:
+
+.. code-block:: bash
+
+  $ clang++ -stdlib=libc++ test.cpp
+
+On systems where libc++ is the library in use by default such as macOS and FreeBSD,
+this flag is not required.
+
+
+.. _alternate libcxx:
+
+Using a custom built libc++
 ===========================
 
-Libc++ provides a number of configuration macros that can be used by developers to
-enable or disable extended libc++ behavior.
+Most compilers provide a way to disable the default behavior for finding the
+standard library and to override it with custom paths. With Clang, this can
+be done with:
 
-.. warning::
-  Configuration macros that are not documented here are not intended to be customized
-  by developers and should not be used. In particular, some configuration macros are
-  only intended to be used by vendors and changing their value from the one provided
-  in your toolchain can lead to unexpected behavior.
+.. code-block:: bash
+
+  $ clang++ -nostdinc++ -nostdlib++           \
+            -isystem <install>/include/c++/v1 \
+            -L <install>/lib                  \
+            -Wl,-rpath,<install>/lib          \
+            -lc++                             \
+            test.cpp
+
+The option ``-Wl,-rpath,<install>/lib`` adds a runtime library search path,
+which causes the system's dynamic linker to look for libc++ in ``<install>/lib``
+whenever the program is loaded.
+
+GCC does not support the ``-nostdlib++`` flag, so one must use ``-nodefaultlibs``
+instead. Since that removes all the standard system libraries and not just libc++,
+the system libraries must be re-added manually. For example:
+
+.. code-block:: bash
+
+  $ g++ -nostdinc++ -nodefaultlibs           \
+        -isystem <install>/include/c++/v1    \
+        -L <install>/lib                     \
+        -Wl,-rpath,<install>/lib             \
+        -lc++ -lc++abi -lm -lc -lgcc_s -lgcc \
+        test.cpp
+
+
+GDB Pretty printers for libc++
+==============================
+
+GDB does not support pretty-printing of libc++ symbols by default. However, libc++ does
+provide pretty-printers itself. Those can be used as:
+
+.. code-block:: bash
+
+  $ gdb -ex "source <libcxx>/utils/gdb/libcxx/printers.py" \
+        -ex "python register_libcxx_printer_loader()" \
+        <args>
+
+.. _include-what-you-use:
+
+include-what-you-use (IWYU)
+===========================
+
+libc++ provides an IWYU `mapping file <https://github.com/include-what-you-use/include-what-you-use/blob/master/docs/IWYUMappings.md>`_,
+which drastically improves the accuracy of the tool when using libc++. To use the mapping file with
+IWYU, you should run the tool like so:
+
+.. code-block:: bash
+
+  $ include-what-you-use -Xiwyu --mapping_file=/path/to/libcxx/include/libcxx.imp file.cpp
+
+If you would prefer to not use that flag, then you can replace ``/path/to/include-what-you-use/share/libcxx.imp``
+file with the libc++-provided ``libcxx.imp`` file.
+
+Libc++ Configuration Macros
+===========================
+
+Libc++ provides a number of configuration macros which can be used to enable
+or disable extended libc++ behavior, including enabling hardening or thread
+safety annotations.
 
 **_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS**:
   This macro is used to enable -Wthread-safety annotations on libc++'s
@@ -134,12 +193,6 @@ enable or disable extended libc++ behavior.
   warning saying that `std::auto_ptr` is deprecated. If the macro is defined,
   no warning will be emitted. By default, this macro is not defined.
 
-**_LIBCPP_ENABLE_EXPERIMENTAL**:
-  This macro enables experimental features. This can be used on compilers that do
-  not support the ``-fexperimental-library`` flag. When used, users also need to
-  ensure that the appropriate experimental library (usually ``libc++experimental.a``)
-  is linked into their program.
-
 C++17 Specific Configuration Macros
 -----------------------------------
 **_LIBCPP_ENABLE_CXX17_REMOVED_AUTO_PTR**:
@@ -156,18 +209,12 @@ C++17 Specific Configuration Macros
 **_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE**:
   This macro is used to re-enable the `random_shuffle` algorithm.
 
-**_LIBCPP_ENABLE_CXX17_REMOVED_UNARY_BINARY_FUNCTION**:
-  This macro is used to re-enable `unary_function` and `binary_function`.
-
 **_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS**:
   This macro is used to re-enable `set_unexpected`, `get_unexpected`, and
   `unexpected`.
 
 C++20 Specific Configuration Macros
 -----------------------------------
-**_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION**:
-  This macro is used to re-enable `uncaught_exception`.
-
 **_LIBCPP_ENABLE_CXX20_REMOVED_SHARED_PTR_UNIQUE**:
   This macro is used to re-enable the function
   ``std::shared_ptr<...>::unique()``.
@@ -184,9 +231,6 @@ C++20 Specific Configuration Macros
 **_LIBCPP_ENABLE_CXX20_REMOVED_RAW_STORAGE_ITERATOR**:
   This macro is used to re-enable `raw_storage_iterator`.
 
-**_LIBCPP_ENABLE_CXX20_REMOVED_TEMPORARY_BUFFER**:
-  This macro is used to re-enable `get_temporary_buffer` and `return_temporary_buffer`.
-
 **_LIBCPP_ENABLE_CXX20_REMOVED_TYPE_TRAITS**:
   This macro is used to re-enable `is_literal_type`, `is_literal_type_v`,
   `result_of` and `result_of_t`.
@@ -263,7 +307,7 @@ Extensions to the C++23 modules ``std`` and ``std.compat``
 ----------------------------------------------------------
 
 Like other major implementations, libc++ provides C++23 modules ``std`` and
-``std.compat`` in C++20 as an extension.
+``std.compat`` in C++20 as an extension"
 
 Constant-initialized std::string
 --------------------------------
@@ -320,14 +364,109 @@ Unpoisoning may not be an option, if (for example) you are not maintaining the a
 * You are using allocator, which does not call destructor during deallocation.
 * You are aware that memory allocated with an allocator may be accessed, even when unused by container.
 
-Support for compiler extensions
--------------------------------
+Offloading C++ Parallel Algorithms to GPUs
+------------------------------------------
+
+Experimental support for GPU offloading has been added to ``libc++``. The
+implementation uses OpenMP target offloading to leverage GPU compute resources.
+The OpenMP PSTL backend can target both NVIDIA and AMD GPUs.
+However, the implementation only supports contiguous iterators, such as
+iterators for ``std::vector`` or ``std::array``.
+To enable the OpenMP offloading backend it must be selected with
+``LIBCXX_PSTL_BACKEND=openmp`` when installing ``libc++``. Further, when
+compiling a program, the user must specify the command line options
+``-fopenmp -fexperimental-library``. To install LLVM with OpenMP offloading
+enabled, please read
+`the LLVM OpenMP FAQ. <https://openmp.llvm.org/SupportAndFAQ.html>`_
+You may also want to to visit
+`the OpenMP offloading command-line argument reference. <https://openmp.llvm.org/CommandLineArgumentReference.html#offload-command-line-arguments>`_
+
+Example
+~~~~~~~
+
+The following is an example of offloading vector addition to a GPU using our
+standard library extension. It implements the classical vector addition from
+BLAS that overwrites the vector ``y`` with ``y=a*x+y``. Thus ``y.begin()`` is
+both used as an input and an output iterator in this example.
+
+.. code-block:: cpp
+
+  #include <algorithm>
+  #include <execution>
+
+  template <typename T1, typename T2, typename T3>
+  void axpy(const T1 a, const std::vector<T2> &x, std::vector<T3> &y) {
+    std::transform(std::execution::par_unseq, x.begin(), x.end(), y.begin(),
+                  y.begin(), [=](T2 xi, T3 yi) { return a * xi + yi; });
+  }
 
-Clang, GCC and other compilers all provide their own set of language extensions. These extensions
-have often been developed without particular consideration for their interaction with the library,
-and as such, libc++ does not go out of its way to support them. The library may support specific
-compiler extensions which would then be documented explicitly, but the basic expectation should be
-that no special support is provided for arbitrary compiler extensions.
+The execution policy ``std::execution::par_unseq`` states that the algorithm's
+execution may be parallelized, vectorized, and migrated across threads. This is
+the only execution mode that is safe to offload to GPUs, and for all other
+execution modes the algorithms will execute on the CPU.
+Special attention must be paid to the lambda captures when enabling GPU
+offloading. If the lambda captures by reference, the user must manually map the
+variables to the device. If capturing by reference, the above example could
+be implemented in the following way.
+
+.. code-block:: cpp
+
+  template <typename T1, typename T2, typename T3>
+  void axpy(const T1 a, const std::vector<T2> &x, std::vector<T3> &y) {
+  #pragma omp target data map(to : a)
+    std::transform(std::execution::par_unseq, x.begin(), x.end(), y.begin(),
+                  y.begin(), [&](T2 xi, T3 yi) { return a * xi + yi; });
+  }
+
+However, if unified shared memory, USM, is enabled, no additional data mapping
+is necessary when capturing y reference.
+
+Compiling functions for GPUs with OpenMP
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The C++ standard defines that all accesses to memory are inside a single address
+space. However, discrete GPU systems have distinct address spaces. A single
+address space can be emulated if your system supports unified shared memory.
+However, many discrete GPU systems do not, and in those cases it is important to
+pass device function pointers to the parallel algorithms. Below is an example of
+how the OpenMP ``declare target`` directive with the ``indirect`` clause can be
+used to mark that a function should be compiled for both host and device.
+
+.. code-block:: cpp
+
+  // This function computes the squared difference of two floating points
+  float squared(float a, float b) { return a * a - 2.0f * a * b + b * b; };
+
+  // Declare that the function must be compiled for both host and device
+  #pragma omp declare target indirect to(squared)
+
+  int main() {
+    std::vector<float> a(100, 1.0);
+    std::vector<float> b(100, 1.25);
+
+    // Pass the host function pointer to the parallel algorithm and let OpenMP
+    // translate it to the device function pointer internally
+    float sum =
+        std::transform_reduce(std::execution::par_unseq, a.begin(), a.end(),
+                              b.begin(), 0.0f, std::plus{}, squared);
+
+    // Validate that the result is approximately 6.25
+    assert(std::abs(sum - 6.25f) < 1e-10);
+    return 0;
+  }
+
+Without unified shared memory, the above example will not work if the host
+function pointer ``squared`` is passed to the parallel algorithm.
+
+Important notes about exception handling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+GPU architectures do not support exception handling. If compiling a program
+containing parallel algorithms with current versions of Clang, a program with
+exceptions in offloaded code regions will compile, but the program will
+terminate if an exception is thrown on the device. This does not conform with
+the C++ standard and exception handling on GPUs will hopefully be better
+supported in future releases of LLVM.
 
 Platform specific behavior
 ==========================
@@ -351,67 +490,3 @@ specific locale is imbued, the IO with the underlying stream happens with
 regular ``char`` elements, which are converted to/from wide characters
 according to the locale. Note that this doesn't behave as expected if the
 stream has been set in Unicode mode.
-
-
-Third-party Integrations
-========================
-
-Libc++ provides integration with a few third-party tools.
-
-Debugging libc++ internals in LLDB
-----------------------------------
-
-LLDB hides the implementation details of libc++ by default.
-
-E.g., when setting a breakpoint in a comparator passed to ``std::sort``, the
-backtrace will read as
-
-.. code-block::
-
-  (lldb) thread backtrace
-  * thread #1, name = 'a.out', stop reason = breakpoint 3.1
-    * frame #0: 0x000055555555520e a.out`my_comparator(a=1, b=8) at test-std-sort.cpp:6:3
-      frame #7: 0x0000555555555615 a.out`void std::__1::sort[abi:ne200000]<std::__1::__wrap_iter<int*>, bool (*)(int, int)>(__first=(item = 8), __last=(item = 0), __comp=(a.out`my_less(int, int) at test-std-sort.cpp:5)) at sort.h:1003:3
-      frame #8: 0x000055555555531a a.out`main at test-std-sort.cpp:24:3
-
-Note how the caller of ``my_comparator`` is shown as ``std::sort``. Looking at
-the frame numbers, we can see that frames #1 until #6 were hidden. Those frames
-represent internal implementation details such as ``__sort4`` and similar
-utility functions.
-
-To also show those implementation details, use ``thread backtrace -u``.
-Alternatively, to disable those compact backtraces, use ``frame recognizer list``
-and ``frame recognizer disable`` on the "libc++ frame recognizer".
-
-Futhermore, stepping into libc++ functions is disabled by default. This is controlled via the
-setting ``target.process.thread.step-avoid-regexp`` which defaults to ``^std::`` and can be
-disabled using ``settings set target.process.thread.step-avoid-regexp ""``.
-
-GDB ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/116869