[compiler-rt] [llvm] [copmiler-rt] Initial support for building profile library on the GPU (PR #185552)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 08:16:23 PDT 2026


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/185552

>From dbb3fb3428f6a34c31c72410eb6f7364a605212b Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 9 Mar 2026 20:54:25 -0500
Subject: [PATCH 1/3] [copmiler-rt] Initial support for building profile
 library on the GPU

Summary:
As suggested in https://github.com/llvm/llvm-project/pull/177665, we
should build a GPU version of the compiler-rt profile library instead of
writing it in-line in the lowering. This PR does not define anything GPU
specific, it simply re-uses the baremetal handling. Later PRs will
prevent the GPU specific handling we would want to do to optimize
counter handling on the GPU.
---
 compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake |  4 +++-
 compiler-rt/cmake/caches/GPU.cmake                   |  6 ++++--
 compiler-rt/cmake/config-ix.cmake                    |  4 ++++
 compiler-rt/lib/profile/CMakeLists.txt               | 12 ++++++++++++
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
index c2de0d0f652e8..c463771223f0c 100644
--- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
+++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake
@@ -1,3 +1,4 @@
+set(AMDGPU amdgcn)
 set(ARM64 aarch64)
 set(ARM32 arm armhf)
 set(HEXAGON hexagon)
@@ -6,6 +7,7 @@ set(X86_64 x86_64)
 set(LOONGARCH64 loongarch64)
 set(MIPS32 mips mipsel)
 set(MIPS64 mips64 mips64el)
+set(NVPTX nvptx64)
 set(PPC32 powerpc powerpcspe)
 set(PPC64 powerpc64 powerpc64le)
 set(RISCV32 riscv32)
@@ -91,7 +93,7 @@ set(ALL_NSAN_SUPPORTED_ARCH ${X86_64})
 set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64} ${RISCV64})
 set(ALL_MEMPROF_SUPPORTED_ARCH ${X86_64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC32} ${PPC64}
-    ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON}
+    ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9} ${HEXAGON} ${AMDGPU} ${NVPTX}
     ${RISCV32} ${RISCV64} ${LOONGARCH64} ${WASM32})
 set(ALL_CTX_PROFILE_SUPPORTED_ARCH ${X86_64})
 if (OS_NAME MATCHES "FreeBSD")
diff --git a/compiler-rt/cmake/caches/GPU.cmake b/compiler-rt/cmake/caches/GPU.cmake
index e448774cf1457..9feccccc5ce12 100644
--- a/compiler-rt/cmake/caches/GPU.cmake
+++ b/compiler-rt/cmake/caches/GPU.cmake
@@ -1,8 +1,9 @@
 # This file sets up a CMakeCache for GPU builds of compiler-rt. This supports
-# amdgcn and nvptx builds targeting the builtins library.
+# amdgcn and nvptx builds targeting the builtins and profile libraries.
 
 set(COMPILER_RT_INCLUDE_TESTS OFF CACHE BOOL "")
 set(COMPILER_RT_HAS_SAFESTACK OFF CACHE BOOL "")
+set(COMPILER_RT_DEFAULT_TARGET_ONLY ON CACHE BOOL "")
 
 set(COMPILER_RT_BUILD_BUILTINS ON CACHE BOOL "")
 set(COMPILER_RT_BAREMETAL_BUILD ON CACHE BOOL "")
@@ -10,9 +11,10 @@ set(COMPILER_RT_BUILD_CRT OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SANITIZERS OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_XRAY OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_LIBFUZZER OFF CACHE BOOL "")
-set(COMPILER_RT_BUILD_PROFILE OFF CACHE BOOL "")
+set(COMPILER_RT_BUILD_PROFILE ON CACHE BOOL "")
 set(COMPILER_RT_BUILD_MEMPROF OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_XRAY_NO_PREINIT OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_ORC OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_GWP_ASAN OFF CACHE BOOL "")
 set(COMPILER_RT_BUILD_SCUDO_SANTDALONE_WITH_LLVM_LIBC OFF CACHE BOOL "")
+set(COMPILER_RT_PROFILE_BAREMETAL ON CACHE BOOL "")
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 95cac0f8faa9f..d14a029e80dae 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -115,6 +115,10 @@ check_cxx_compiler_flag(--sysroot=.          COMPILER_RT_HAS_SYSROOT_FLAG)
 check_cxx_compiler_flag("-Werror -mcrc"      COMPILER_RT_HAS_MCRC_FLAG)
 check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)
 check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT)
+check_c_compiler_flag(-nogpulib             COMPILER_RT_HAS_NOGPULIB_FLAG)
+check_c_compiler_flag(-flto                 COMPILER_RT_HAS_FLTO_FLAG)
+check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
+check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
 
 if(NOT WIN32 AND NOT CYGWIN)
   # MinGW warns if -fvisibility-inlines-hidden is used.
diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index ca2b97a3169a9..108ca48b96c56 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -154,6 +154,18 @@ if(COMPILER_RT_PROFILE_BAREMETAL)
      -DCOMPILER_RT_PROFILE_BAREMETAL=1)
 endif()
 
+if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
+  append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
+  append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
+  append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
+  append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
+                 -fconvergent-functions EXTRA_FLAGS)
+  if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
+    append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
+                   "SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)
+  endif()
+endif()
+
 if(MSVC)
   # profile historically has only been supported with the static runtime
   # on windows

>From 3a197c515b61623062a450dec39152627052732c Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 10 Mar 2026 09:38:06 -0500
Subject: [PATCH 2/3] Cache file updates because this changes behavior

---
 offload/cmake/caches/AMDGPULibcBot.cmake | 2 +-
 offload/cmake/caches/FlangOffload.cmake  | 4 ++--
 offload/cmake/caches/Offload.cmake       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/offload/cmake/caches/AMDGPULibcBot.cmake b/offload/cmake/caches/AMDGPULibcBot.cmake
index 798f080a41adf..ffd4f7c9b1ad2 100644
--- a/offload/cmake/caches/AMDGPULibcBot.cmake
+++ b/offload/cmake/caches/AMDGPULibcBot.cmake
@@ -17,6 +17,6 @@ set(CLANG_DEFAULT_LINKER "lld" CACHE STRING "")
 set(CLANG_DEFAULT_RTLIB "compiler-rt" STRING "")
 
 set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa CACHE STRING "")
-set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
 set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;openmp;libc;libcxxabi;libcxx" CACHE STRING "")
 set(RUNTIMES_amdgcn-amd-amdhsa_LIBC_GPU_TEST_JOBS 4 CACHE STRING "")
diff --git a/offload/cmake/caches/FlangOffload.cmake b/offload/cmake/caches/FlangOffload.cmake
index ed75df6bb7e13..d8e1c92655747 100644
--- a/offload/cmake/caches/FlangOffload.cmake
+++ b/offload/cmake/caches/FlangOffload.cmake
@@ -3,8 +3,8 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;libcxx;libcxxabi;openmp
 set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
 
 set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") 
-set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
-set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
 set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
 set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi;flang-rt" CACHE STRING "")
 set(RUNTIMES_nvptx64-nvidia-cuda_FLANG_RT_LIBC_PROVIDER llvm CACHE STRING "")
diff --git a/offload/cmake/caches/Offload.cmake b/offload/cmake/caches/Offload.cmake
index 3747a1d3eb299..14cc63a508482 100644
--- a/offload/cmake/caches/Offload.cmake
+++ b/offload/cmake/caches/Offload.cmake
@@ -3,7 +3,7 @@ set(LLVM_ENABLE_RUNTIMES "compiler-rt;libunwind;libcxx;libcxxabi;openmp;offload"
 set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR ON CACHE BOOL "")
 
 set(LLVM_RUNTIME_TARGETS default;amdgcn-amd-amdhsa;nvptx64-nvidia-cuda CACHE STRING "") 
-set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
-set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
+set(RUNTIMES_nvptx64-nvidia-cuda_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/NVPTX.cmake" CACHE STRING "")
+set(RUNTIMES_amdgcn-amd-amdhsa_CACHE_FILES "${CMAKE_SOURCE_DIR}/../compiler-rt/cmake/caches/GPU.cmake;${CMAKE_SOURCE_DIR}/../libcxx/cmake/caches/AMDGPU.cmake" CACHE STRING "")
 set(RUNTIMES_nvptx64-nvidia-cuda_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")
 set(RUNTIMES_amdgcn-amd-amdhsa_LLVM_ENABLE_RUNTIMES "compiler-rt;libc;openmp;libcxx;libcxxabi" CACHE STRING "")

>From e5104b4cab283981f83b1a639b5df881b1009942 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Tue, 10 Mar 2026 10:16:12 -0500
Subject: [PATCH 3/3] noconvergentfunctions

---
 compiler-rt/cmake/base-config-ix.cmake    | 6 ++----
 compiler-rt/cmake/builtin-config-ix.cmake | 1 -
 compiler-rt/cmake/config-ix.cmake         | 1 -
 compiler-rt/lib/builtins/CMakeLists.txt   | 2 --
 compiler-rt/lib/profile/CMakeLists.txt    | 2 --
 5 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 37dfa5534dfef..5ab351d98964f 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -225,8 +225,7 @@ macro(test_targets)
       endif()
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
       test_target_arch(amdgcn "" "--target=amdgcn-amd-amdhsa" "-nogpulib"
-                       "-flto" "-fconvergent-functions"
-                       "-Xclang -mcode-object-version=none")
+                       "-flto" "-Xclang -mcode-object-version=none")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "hexagon")
       test_target_arch(hexagon "" "")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "loongarch64")
@@ -270,8 +269,7 @@ macro(test_targets)
         test_target_arch(mips64 "" "-mips64r2" "-mabi=64")
       endif()
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "nvptx")
-      test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto"
-                       "-fconvergent-functions" "-c")
+      test_target_arch(nvptx64 "" "--nvptx64-nvidia-cuda" "-nogpulib" "-flto" "-c")
     elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "arm")
       if(WIN32)
         test_target_arch(arm "" "" "")
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index 569582a35e7ab..abaaeadf3bd06 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-c2y-extensions   COMPILER_RT_HAS_WNO_C2Y_EXTE
 builtin_check_c_compiler_flag(-Wno-pedantic         COMPILER_RT_HAS_WNO_PEDANTIC)
 builtin_check_c_compiler_flag(-nogpulib             COMPILER_RT_HAS_NOGPULIB_FLAG)
 builtin_check_c_compiler_flag(-flto                 COMPILER_RT_HAS_FLTO_FLAG)
-builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
 builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
 builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
 builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index d14a029e80dae..381e2e4b28abd 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -117,7 +117,6 @@ check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINI
 check_cxx_compiler_flag("-Werror -ftrivial-auto-var-init=pattern" COMPILER_RT_HAS_TRIVIAL_AUTO_INIT)
 check_c_compiler_flag(-nogpulib             COMPILER_RT_HAS_NOGPULIB_FLAG)
 check_c_compiler_flag(-flto                 COMPILER_RT_HAS_FLTO_FLAG)
-check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
 check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
 
 if(NOT WIN32 AND NOT CYGWIN)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 7ce929657eb82..6c27f6d4d529e 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -957,8 +957,6 @@ else ()
     append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding BUILTIN_CFLAGS)
     append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib BUILTIN_CFLAGS)
     append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
-    append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
-                   -fconvergent-functions BUILTIN_CFLAGS)
 
     # AMDGPU targets want to use a generic ABI.
     if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index 108ca48b96c56..4cc2610cec870 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -158,8 +158,6 @@ if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn|nvptx")
   append_list_if(COMPILER_RT_HAS_FFREESTANDING_FLAG -ffreestanding EXTRA_FLAGS)
   append_list_if(COMPILER_RT_HAS_NOGPULIB_FLAG -nogpulib EXTRA_FLAGS)
   append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto EXTRA_FLAGS)
-  append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
-                 -fconvergent-functions EXTRA_FLAGS)
   if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
     append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
                    "SHELL:-Xclang -mcode-object-version=none" EXTRA_FLAGS)



More information about the llvm-commits mailing list