[llvm] [Offload] Rework compiling device code for unit test suites (PR #144776)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 08:24:38 PDT 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/144776
>From fffb54b128b77e7b9ba6bc36cf7112dfb60a8c98 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 18 Jun 2025 13:30:10 -0500
Subject: [PATCH 1/3] [Offload] Rework compiling device code for unit test
suites
Summary:
I'll probably want to use this as a more generic utility in the future.
This patch reworks it to make it a top level function. I also tried to
decouple this from the OpenMP utilities to make that easier in the
future. Instead, I just use `-march=native` functionality which is the
same thing. Needed a small hack to skip the linker stage for checking if
that works.
This should still create the same output as far as I'm aware.
---
offload/unittests/CMakeLists.txt | 73 ++++++++++++++++++-
.../OffloadAPI/device_code/CMakeLists.txt | 69 +-----------------
2 files changed, 74 insertions(+), 68 deletions(-)
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 985dd892d8046..4670565ab634d 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -1,6 +1,72 @@
add_custom_target(OffloadUnitTests)
set_target_properties(OffloadUnitTests PROPERTIES FOLDER "Tests/UnitTests")
+function(add_offload_test_device_code test_filename test_name)
+ set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
+ set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
+
+ # Try to build with support for NVPTX devices.
+ if("cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+ find_package(CUDAToolkit QUIET)
+ if(CUDAToolkit_FOUND)
+ get_filename_component(cuda_path "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
+ endif()
+ check_cxx_compiler_flag(
+ "--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
+
+ if(PLATFORM_HAS_NVPTX)
+ set(nvptx_arch "native")
+ elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+ set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+ endif()
+
+ if(nvptx_arch AND CUDAToolkit_FOUND)
+ set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND ${CMAKE_C_COMPILER}
+ --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
+ -nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
+ -c ${SRC_PATH} -o ${output_file}
+ DEPENDS ${SRC_PATH}
+ )
+ add_custom_target(${test_name}.nvptx64 DEPENDS ${output_file})
+ endif()
+ endif()
+
+ # Try to build with support for AMDGPU devices.
+ if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
+ check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
+
+ if(PLATFORM_HAS_AMDGPU)
+ set(amdgpu_arch "native")
+ elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+ set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+ endif()
+
+ if(amdgpu_arch)
+ set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
+ add_custom_command(
+ OUTPUT ${output_file}
+ COMMAND ${CMAKE_C_COMPILER}
+ --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
+ -nogpulib -flto ${ARGN} -c ${SRC_PATH} -o ${output_file}
+ DEPENDS ${SRC_PATH}
+ )
+ add_custom_target(${test_name}.amdgpu DEPENDS ${output_file})
+ endif()
+ endif()
+
+ # Create a single dependency target for the device code.
+ add_custom_target(${test_name}.bin)
+ if(TARGET ${test_name}.amdgpu)
+ add_dependencies(${test_name}.bin ${test_name}.amdgpu)
+ endif()
+ if(TARGET ${test_name}.nvptx64)
+ add_dependencies(${test_name}.bin ${test_name}.nvptx64)
+ endif()
+endfunction()
+
function(add_offload_unittest test_dirname)
set(target_name "${test_dirname}.unittests")
@@ -9,10 +75,15 @@ function(add_offload_unittest test_dirname)
add_unittest(OffloadUnitTests "${target_name}"
${CMAKE_CURRENT_SOURCE_DIR}/common/Environment.cpp
${files})
- add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} OffloadUnitTestsDeviceBins)
+ add_dependencies(${target_name} ${PLUGINS_TEST_COMMON} offload_device_binaries)
target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}")
target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON})
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
endfunction()
+set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+ "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
+set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
+ "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
+
add_subdirectory(OffloadAPI)
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index c2e4d0cb24e6e..132c7a7c51fb8 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,72 +1,7 @@
-macro(add_offload_test_device_code test_filename test_name)
- set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
-
- # Build for NVPTX
- if(OFFLOAD_TEST_TARGET_NVIDIA)
- set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin)
- add_custom_command(OUTPUT ${BIN_PATH}
- COMMAND
- ${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
- ${ARGN}
- -march=${LIBOMPTARGET_DEP_CUDA_ARCH}
- --cuda-path=${CUDA_ROOT}
- ${SRC_PATH} -o ${BIN_PATH}
- DEPENDS ${SRC_PATH}
- )
- list(APPEND BIN_PATHS ${BIN_PATH})
- endif()
-
- # Build for AMDGPU
- if(OFFLOAD_TEST_TARGET_AMDGPU)
- set(BIN_PATH ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin)
- add_custom_command(OUTPUT ${BIN_PATH}
- COMMAND
- ${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
- ${ARGN}
- -mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
- ${SRC_PATH} -o ${BIN_PATH}
- DEPENDS ${SRC_PATH}
- )
- list(APPEND BIN_PATHS ${BIN_PATH})
- endif()
-
- # TODO: Build for host CPU
-endmacro()
-
-
-# Decide what device targets to build for. LibomptargetGetDependencies is
-# included at the top-level so the GPUs present on the system are already
-# detected.
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
- "Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
-set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
- "Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
-
-find_package(CUDAToolkit QUIET)
-if(CUDAToolkit_FOUND)
- get_filename_component(CUDA_ROOT "${CUDAToolkit_BIN_DIR}" DIRECTORY ABSOLUTE)
-endif()
-if (OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
- set(LIBOMPTARGET_DEP_CUDA_ARCH ${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH})
- set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-elseif (LIBOMPTARGET_FOUND_NVIDIA_GPU AND CUDA_ROOT AND "cuda" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
- set(OFFLOAD_TEST_TARGET_NVIDIA ON)
-endif()
-
-if (OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
- set(LIBOMPTARGET_DEP_AMDGPU_ARCH ${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH})
- set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-elseif (LIBOMPTARGET_FOUND_AMDGPU_GPU AND "amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
- list(GET LIBOMPTARGET_AMDGPU_DETECTED_ARCH_LIST 0 LIBOMPTARGET_DEP_AMDGPU_ARCH)
- set(OFFLOAD_TEST_TARGET_AMDGPU ON)
-endif()
-
add_offload_test_device_code(foo.c foo)
add_offload_test_device_code(bar.c bar)
-# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
-# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
+# Compile with optimizations to eliminate AMDGPU implicit arguments.
add_offload_test_device_code(noargs.c noargs -O3)
-add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
-
+add_custom_target(offload_device_binaries DEPENDS foo.bin bar.bin noargs.bin)
set(OFFLOAD_TEST_DEVICE_CODE_PATH ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
>From 19c1b1a8e6899b5c6b457132f3cea32afa5a44b7 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 20 Jun 2025 07:23:14 -0500
Subject: [PATCH 2/3] fix
---
offload/unittests/CMakeLists.txt | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 4670565ab634d..f30c3ffb494af 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -14,10 +14,10 @@ function(add_offload_test_device_code test_filename test_name)
check_cxx_compiler_flag(
"--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
- if(PLATFORM_HAS_NVPTX)
- set(nvptx_arch "native")
- elseif(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
+ if(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+ elseif(PLATFORM_HAS_NVPTX)
+ set(nvptx_arch "native")
endif()
if(nvptx_arch AND CUDAToolkit_FOUND)
@@ -38,10 +38,10 @@ function(add_offload_test_device_code test_filename test_name)
if("amdgpu" IN_LIST LIBOMPTARGET_PLUGINS_TO_BUILD)
check_cxx_compiler_flag("--target=amdgcn-amd-amdhsa -mcpu=native" PLATFORM_HAS_AMDGPU)
- if(PLATFORM_HAS_AMDGPU)
- set(amdgpu_arch "native")
- elseif(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
+ if(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH)
set(amdgpu_arch "${OFFLOAD_TESTS_FORCE_AMDGPU_ARCH}")
+ elseif(PLATFORM_HAS_AMDGPU)
+ set(amdgpu_arch "native")
endif()
if(amdgpu_arch)
>From a6515838b270f688a3f51b0887987e60a181e9ae Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Fri, 20 Jun 2025 10:24:28 -0500
Subject: [PATCH 3/3] rename
---
offload/unittests/CMakeLists.txt | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index f30c3ffb494af..7cd41e1dcdafd 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -14,8 +14,8 @@ function(add_offload_test_device_code test_filename test_name)
check_cxx_compiler_flag(
"--target=nvptx64-nvidia-cuda -march=native --cuda-path=${cuda_path}" PLATFORM_HAS_NVPTX)
- if(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH)
- set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVIDIA_ARCH}")
+ if(OFFLOAD_TESTS_FORCE_NVPTX_ARCH)
+ set(nvptx_arch "${OFFLOAD_TESTS_FORCE_NVPTX_ARCH}")
elseif(PLATFORM_HAS_NVPTX)
set(nvptx_arch "native")
endif()
@@ -81,7 +81,7 @@ function(add_offload_unittest test_dirname)
target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE})
endfunction()
-set(OFFLOAD_TESTS_FORCE_NVIDIA_ARCH "" CACHE STRING
+set(OFFLOAD_TESTS_FORCE_NVPTX_ARCH "" CACHE STRING
"Force building of NVPTX device code for Offload unit tests with the given arch, e.g. sm_61")
set(OFFLOAD_TESTS_FORCE_AMDGPU_ARCH "" CACHE STRING
"Force building of AMDGPU device code for Offload unit tests with the given arch, e.g. gfx1030")
More information about the llvm-commits
mailing list