[compiler-rt] [llvm] [copmiler-rt] Initial support for building profile library on the GPU (PR #185552)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 08:16:23 PDT 2026
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/185552
>From dbb3fb3428f6a34c31c72410eb6f7364a605212b Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 9 Mar 2026 20:54:25 -0500
Subject: [PATCH 1/3] [copmiler-rt] Initial support for building profile
library on the GPU
Summary:
As suggested in https://github.com/llvm/llvm-project/pull/177665, we
should build a GPU version of the compiler-rt profile library instead of
writing it in-line in the lowering. This PR does not define anything GPU
specific, it simply re-uses the baremetal handling. Later PRs will
prevent the GPU specific handling we would want to do to optimize
counter handling on the GPU.
---
compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake | 4 +++-
compiler-rt/cmake/caches/GPU.cmake | 6 ++++--
compiler-rt/cmake/config-ix.cmake | 4 ++++
compiler-rt/lib/profile/CMakeLists.txt | 12 ++++++++++++
4 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index c2de0d0f652e8..c463771223f0c 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -1,3 +1,4 @@
+set(AMDGPU amdgcn)
set(ARM64 aarch64)
set(ARM32 arm armhf)
set(HEXAGON hexagon)
@@ -6,6 +7,7 @@ set(X86_64 x86_64)
set(LOONGARCH64 loongarch64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
+set(NVPTX nvptx64)
set(PPC32 powerpc powerpcspe)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
@@ -91,7 +93,7 @@ set(ALL_NSAN_SUPPORTED_ARCH ${X86_64})
set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64}
- ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
+ ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${AMDGPU} ${NVPTX}
${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32})
set(ALL_CTX_PROFILE_SUPPORTED_ARCH ${X86_64})
if (OS_NAME MATCHES "FreeBSD")
diff --git a/compiler-rt/cmake/caches/GPU.cmake b/compiler-rt/cmake/caches/GPU.cmake
index e448774cf1457..9feccccc5ce12 100644
--- a/compiler-rt/cmake/caches/GPU.cmake
+++ b/compiler-rt/cmake/caches/GPU.cmake
@@ -1,8 +1,9 @@
# This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
-# amdgcn and nvptx builds targeting the builtins library.
+# amdgcn and nvptx builds targeting the builtins and profile libraries.
set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
+set(COMPILER_RT_DEFAULT_TARGET_ONLY ON CACHE BOOL "")
set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
@@ -10,9 +11,10 @@ set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
-set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_PROFILE ON CACHE BOOL "")
set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
+set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 95cac0f8faa9f..d14a029e80dae 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -115,6 +115,10 @@ check_cxx_compiler_flag(--sysroot=. COMPILER_RT_HAS_SYSROOT_FLAG)
check_cxx_compiler_flag("-Werror -mcrc" COMPILER_RT_HAS_MCRC_FLAG)
check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)
check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT)
+check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
+check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
+check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
+check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
if(NOT WIN32 AND NOT CYGWIN)
# MinGW warns if -fvisibility-inlines-hidden is used.
diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index ca2b97a3169a9..108ca48b96c56 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -154,6 +154,18 @@ if(COMPILER_RT_PROFILE_BAREMETAL)
-DCOMPILER_RT_PROFILE_BAREMETAL=1)
endif()
+if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
+ append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
+ append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
+ append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
+ append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
+ -fconvergent-functions EXTRA_FLAGS)
+ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+ append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
+ "SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)
+ endif()
+endif()
+
if(MSVC)
# profile historically has only been supported with the static runtime
# on windows
>From 3a197c515b61623062a450dec39152627052732c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 10 Mar 2026 09:38:06 -0500
Subject: [PATCH 2/3] Cache file updates because this changes behavior
---
offload/cmake/caches/AMDGPULibcBot.cmake | 2 +-
offload/cmake/caches/FlangOffload.cmake | 4 ++--
offload/cmake/caches/Offload.cmake | 4 ++--
3 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/offload/cmake/caches/AMDGPULibcBot.cmake b/offload/cmake/caches/AMDGPULibcBot.cmake
index 798f080a41adf..ffd4f7c9b1ad2 100644
--- a/offload/cmake/caches/AMDGPULibcBot.cmake
+++ b/offload/cmake/caches/AMDGPULibcBot.cmake
@@ -17,6 +17,6 @@ set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "")
set(CLANG_DEFAULT_RTLIB "compiler-rt" STRING "")
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa CACHE STRING "")
-set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;openmp;libc;libcxxabi;libcxx" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_TEST_JOBS 4 CACHE STRING "")
diff --git a/offload/cmake/caches/FlangOffload.cmake b/offload/cmake/caches/FlangOffload.cmake
index ed75df6bb7e13..d8e1c92655747 100644
--- a/offload/cmake/caches/FlangOffload.cmake
+++ b/offload/cmake/caches/FlangOffload.cmake
@@ -3,8 +3,8 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;libcxx;libcxxabi;openmp
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
-set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
-set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "")
diff --git a/offload/cmake/caches/Offload.cmake b/offload/cmake/caches/Offload.cmake
index 3747a1d3eb299..14cc63a508482 100644
--- a/offload/cmake/caches/Offload.cmake
+++ b/offload/cmake/caches/Offload.cmake
@@ -3,7 +3,7 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;libunwind;libcxx;libcxxabi;openmp;offload"
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "")
-set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
-set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")
set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")
>From e5104b4cab283981f83b1a639b5df881b1009942 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 10 Mar 2026 10:16:12 -0500
Subject: [PATCH 3/3] noconvergentfunctions
---
compiler-rt/cmake/base-config-ix.cmake | 6 ++----
compiler-rt/cmake/builtin-config-ix.cmake | 1 -
compiler-rt/cmake/config-ix.cmake | 1 -
compiler-rt/lib/builtins/CMakeLists.txt | 2 --
compiler-rt/lib/profile/CMakeLists.txt | 2 --
5 files changed, 2 insertions(+), 10 deletions(-)
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 37dfa5534dfef..5ab351d98964f 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -225,8 +225,7 @@ macro(test_targets)
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
- "-flto" "-fconvergent-functions"
- "-Xclang -mcode-object-version=none")
+ "-flto" "-Xclang -mcode-object-version=none")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "hexagon")
test_target_arch(hexagon "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
@@ -270,8 +269,7 @@ macro(test_targets)
test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
endif()
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
- test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
- "-fconvergent-functions" "-c")
+ test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto" "-c")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
if(WIN32)
test_target_arch(arm "" "" "")
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 569582a35e7ab..abaaeadf3bd06 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-c2y-extensions COMPILER_RT_HAS_WNO_C2Y_EXTE
builtin_check_c_compiler_flag(-Wno-pedantic COMPILER_RT_HAS_WNO_PEDANTIC)
builtin_check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
builtin_check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
-builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index d14a029e80dae..381e2e4b28abd 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -117,7 +117,6 @@ check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINI
check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT)
check_c_compiler_flag(-nogpulib COMPILER_RT_HAS_NOGPULIB_FLAG)
check_c_compiler_flag(-flto COMPILER_RT_HAS_FLTO_FLAG)
-check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
if(NOT WIN32 AND NOT CYGWIN)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 7ce929657eb82..6c27f6d4d529e 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -957,8 +957,6 @@ else ()
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
- append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
- -fconvergent-functions BUILTIN_CFLAGS)
# AMDGPU targets want to use a generic ABI.
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index 108ca48b96c56..4cc2610cec870 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -158,8 +158,6 @@ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
- append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
- -fconvergent-functions EXTRA_FLAGS)
if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
"SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)
More information about the llvm-commits
mailing list