[libcxx-commits] [libcxx] [libcxx] Add testing configuration for GPU targets (PR #104515)

Thu Aug 15 15:07:50 PDT 2024

https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/104515

Summary:
The GPU runs these tests using the files built from the `libc` project.
These will be placed in `include/<triple>` and `lib/<triple>`. We use
the `amdhsa-loader` and `nvptx-loader` tools, which are also provided by
`libc`. These launch a kernel called `_start` which calls `main` so we
can pretend like GPU programs are normal terminal applications.

We force serial exeuction here, because `llvm-lit` runs way too many
processes in parallel, which has a bad habit of making the GPU drivers
hang or run out of resources. This allows the compilation to be run in
parallel while the jobs themselves are serialized via a file lock.

In the future this can likely be refined to accept user specified
architectures, or better handle including the root directory by exposing
that instead of just `include/<triple>/c++/v1/`.

This currently fails ~1% of the tests on AMDGPU and ~3% of the tests on
NVPTX. This will hopefully be reduced further, and later patches can
XFAIL a lot of them once it's down to a reasonable number.

Future support will likely want to allow passing in a custom
architecture instead of simply relying on `-mcpu=native`.


>From f4d18834da00713a2cdb1a5b3160f54964bf3fd0 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 17 Jul 2024 11:12:57 -0500
Subject: [PATCH 1/2] [libcxx] Add cache file for the GPU build

Summary:
This patch adds a CMake cache config file for the GPU build. This cache
will set the default required options when used from the LLVM runtime
interface or directly. These options pretty much disable everything the
GPU can't handle.

With this and the fllowing patches: #99259, #99243, #99287, and #99333,
we will be able to build `libc++` targeting the GPU with an invocation
like this.

```
$ cmake ../llvm
-C${LLVM_ROOT}/libcxx/cmake/caches/GPU.cmake                                \
-DRUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES=compiler-rt;libc;libcxx \
-DRUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES=compiler-rt;libc;libcxx   \
-DLLVM_RUNTIME_TARGETS=amdgcn-amd-amdhsa;nvptx64-nvidia-cuda                \
```

This will then install the libraries and headers into the appropriate
locations for use with `clang`.

Move to separate files
---
 libcxx/cmake/caches/AMDGPU.cmake | 36 ++++++++++++++++++++++++++++++++
 libcxx/cmake/caches/NVPTX.cmake  | 36 ++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 libcxx/cmake/caches/AMDGPU.cmake
 create mode 100644 libcxx/cmake/caches/NVPTX.cmake

diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake
new file mode 100644
index 00000000000000..127f880d2fb44c
--- /dev/null
+++ b/libcxx/cmake/caches/AMDGPU.cmake
@@ -0,0 +1,36 @@
+# Configuration options for libcxx.
+set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
+set(LIBCXX_CXX_ABI none CACHE STRING "")
+set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
+set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RANDOM_DEVICE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_LOCALIZATION OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_UNICODE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_WIDE_CHARACTERS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXX_HAS_TERMINAL_AVAILABLE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RTTI OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_MONOTONIC_CLOCK ON CACHE BOOL "")
+set(LIBCXX_INSTALL_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_LIBC "llvm-libc" CACHE STRING "")
+set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "")
+
+# Configuration options for libcxxabi.
+set(LIBCXXABI_BAREMETAL ON CACHE BOOL "")
+set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
+set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "")
+
+# Necessary compile flags for AMDGPU.
+set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
+    "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
+set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
+    "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
+set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs" CACHE STRING "")
diff --git a/libcxx/cmake/caches/NVPTX.cmake b/libcxx/cmake/caches/NVPTX.cmake
new file mode 100644
index 00000000000000..f921bb2741b498
--- /dev/null
+++ b/libcxx/cmake/caches/NVPTX.cmake
@@ -0,0 +1,36 @@
+# Configuration options for libcxx.
+set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
+set(LIBCXX_CXX_ABI none CACHE STRING "")
+set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC ON CACHE BOOL "")
+set(LIBCXX_ENABLE_FILESYSTEM OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RANDOM_DEVICE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_LOCALIZATION OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_UNICODE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_WIDE_CHARACTERS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXX_HAS_TERMINAL_AVAILABLE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_RTTI OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_STATICALLY_LINK_ABI_IN_STATIC_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_MONOTONIC_CLOCK ON CACHE BOOL "")
+set(LIBCXX_INSTALL_LIBRARY ON CACHE BOOL "")
+set(LIBCXX_LIBC "llvm-libc" CACHE STRING "")
+set(LIBCXX_USE_COMPILER_RT ON CACHE BOOL "")
+set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "")
+
+# Configuration options for libcxxabi.
+set(LIBCXXABI_BAREMETAL ON CACHE BOOL "")
+set(LIBCXXABI_ENABLE_SHARED OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_EXCEPTIONS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
+set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
+set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "")
+
+# Necessary compile flags for NVPTX.
+set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
+    "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
+set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
+    "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
+set(CMAKE_REQUIRED_FLAGS "-nogpulib -nodefaultlibs -flto -c" CACHE STRING "")

>From 0589aaa66da5afeccdc5c54c3b068a7b11e169aa Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 2 Aug 2024 10:43:06 -0500
Subject: [PATCH 2/2] [libcxx] Add testing configuration for GPU targets

Summary:
The GPU runs these tests using the files built from the `libc` project.
These will be placed in `include/<triple>` and `lib/<triple>`. We use
the `amdhsa-loader` and `nvptx-loader` tools, which are also provided by
`libc`. These launch a kernel called `_start` which calls `main` so we
can pretend like GPU programs are normal terminal applications.

We force serial exeuction here, because `llvm-lit` runs way too many
processes in parallel, which has a bad habit of making the GPU drivers
hang or run out of resources. This allows the compilation to be run in
parallel while the jobs themselves are serialized via a file lock.

In the future this can likely be refined to accept user specified
architectures, or better handle including the root directory by exposing
that instead of just `include/<triple>/c++/v1/`.

This currently fails ~1% of the tests on AMDGPU and ~3% of the tests on
NVPTX. This will hopefully be reduced further, and later patches can
XFAIL a lot of them once it's down to a reasonable number.

Future support will likely want to allow passing in a custom
architecture instead of simply relying on `-mcpu=native`.
---
 libcxx/cmake/caches/AMDGPU.cmake              |  4 +++
 libcxx/cmake/caches/NVPTX.cmake               |  4 +++
 .../test/configs/amdgpu-libc++-shared.cfg.in  | 29 +++++++++++++++++
 .../test/configs/nvptx-libc++-shared.cfg.in   | 31 +++++++++++++++++++
 .../deque.modifiers/insert_range.pass.cpp     |  4 +++
 .../replace_with_range.pass.cpp               |  4 +++
 6 files changed, 76 insertions(+)
 create mode 100644 libcxx/test/configs/amdgpu-libc++-shared.cfg.in
 create mode 100644 libcxx/test/configs/nvptx-libc++-shared.cfg.in

diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake
index 127f880d2fb44c..00549c69af00fb 100644
--- a/libcxx/cmake/caches/AMDGPU.cmake
+++ b/libcxx/cmake/caches/AMDGPU.cmake
@@ -28,6 +28,10 @@ set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
 set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
 set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "")
 
+# Test configuration.
+set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "long_tests=False;executor=amdhsa-loader" CACHE STRING "")
+
 # Necessary compile flags for AMDGPU.
 set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
     "-nogpulib;-flto;-fconvergent-functions;-Xclang;-mcode-object-version=none" CACHE STRING "")
diff --git a/libcxx/cmake/caches/NVPTX.cmake b/libcxx/cmake/caches/NVPTX.cmake
index f921bb2741b498..dae83940af5b04 100644
--- a/libcxx/cmake/caches/NVPTX.cmake
+++ b/libcxx/cmake/caches/NVPTX.cmake
@@ -28,6 +28,10 @@ set(LIBCXXABI_ENABLE_THREADS OFF CACHE BOOL "")
 set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
 set(LIBCXXABI_USE_LLVM_UNWINDER OFF CACHE BOOL "")
 
+# Test configuration.
+set(LIBCXX_TEST_CONFIG "nvptx-libc++-shared.cfg.in" CACHE STRING "")
+set(LIBCXX_TEST_PARAMS "long_tests=False;executor=nvptx-loader" CACHE STRING "")
+
 # Necessary compile flags for NVPTX.
 set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
     "-nogpulib;-flto;-fconvergent-functions;--cuda-feature=+ptx63" CACHE STRING "")
diff --git a/libcxx/test/configs/amdgpu-libc++-shared.cfg.in b/libcxx/test/configs/amdgpu-libc++-shared.cfg.in
new file mode 100644
index 00000000000000..9b37a81f8de5d4
--- /dev/null
+++ b/libcxx/test/configs/amdgpu-libc++-shared.cfg.in
@@ -0,0 +1,29 @@
+lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg')
+
+config.substitutions.append(('%{flags}',
+  f'--target={config.target_triple} -Wno-multi-gpu -flto -mcpu=native'))
+config.substitutions.append(('%{compile_flags}',
+    '-nogpulib -fno-builtin-printf -nogpuinc -nostdlibinc '
+    '-I %{include-dir} -I %{target-include-dir}/../../ '
+    '-I %{target-include-dir} -I %{libcxx-dir}/test/support'
+))
+config.substitutions.append(('%{link_flags}',
+  '-O1 -nostdinc++ -nostdlib++ %{lib-dir}/crt1.o '
+  '-L %{lib-dir} -lc++ -lc++abi -lclang_rt.builtins '
+))
+
+config.substitutions.append(('%{exec}',
+    '%{executor} --no-parallelism'
+))
+
+config.stdlib = 'llvm-libc++'
+
+import os, site
+site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
+import libcxx.test.params, libcxx.test.config
+libcxx.test.config.configure(
+    libcxx.test.params.DEFAULT_PARAMETERS,
+    libcxx.test.features.DEFAULT_FEATURES,
+    config,
+    lit_config
+)
diff --git a/libcxx/test/configs/nvptx-libc++-shared.cfg.in b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
new file mode 100644
index 00000000000000..26d93b29183f72
--- /dev/null
+++ b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
@@ -0,0 +1,31 @@
+lit_config.load_config(config, '@CMAKE_CURRENT_BINARY_DIR@/cmake-bridge.cfg')
+
+config.substitutions.append(('%{flags}',
+  f'--target={config.target_triple} -Wno-multi-gpu -flto -march=native'))
+config.substitutions.append(('%{compile_flags}',
+    '-nogpulib -fno-builtin-printf -nogpuinc -nostdlibinc '
+    '-I %{include-dir} -I %{target-include-dir}/../../ '
+    '-I %{target-include-dir} -I %{libcxx-dir}/test/support'
+))
+config.substitutions.append(('%{link_flags}',
+   '-nostdinc++ -nostdlib++ %{lib-dir}/crt1.o '
+   '-L %{lib-dir} -lc++ -lc++abi -lclang_rt.builtins '
+   '-Wl,--suppress-stack-size-warning '
+   '-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
+   '-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
+))
+config.substitutions.append(('%{exec}',
+    '%{executor} --no-parallelism'
+))
+
+config.stdlib = 'llvm-libc++'
+
+import os, site
+site.addsitedir(os.path.join('@LIBCXX_SOURCE_DIR@', 'utils'))
+import libcxx.test.params, libcxx.test.config
+libcxx.test.config.configure(
+    libcxx.test.params.DEFAULT_PARAMETERS,
+    libcxx.test.features.DEFAULT_FEATURES,
+    config,
+    lit_config
+)
diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
index a5f5455297ad44..b0218cb75aca93 100644
--- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
+++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/insert_range.pass.cpp
@@ -6,6 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+// FIXME: This takes over an hour to compile, disable for now.
+// UNSUPPORTED: target=amdgcn-amd-amdhsa
+// UNSUPPORTED: target=nvptx64-nvidia-cuda
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
 // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME
 
diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp
index 03e82590ed4ef6..d4b16b79a0b8dc 100644
--- a/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_replace/replace_with_range.pass.cpp
@@ -6,6 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+// FIXME: This takes over an hour to compile, disable for now.
+// UNSUPPORTED: target=amdgcn-amd-amdhsa
+// UNSUPPORTED: target=nvptx64-nvidia-cuda
+
 // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20
 // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=10000000
 // ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=70000000