[llvm] [Offload][UnitTests] Build device code as C++ (PR #151714)
Joseph Huber via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 1 08:44:17 PDT 2025
https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/151714
>From fef9db10585868da7da5d8d45d7a8ebc643e8617 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Fri, 1 Aug 2025 12:31:56 -0300
Subject: [PATCH] Build device code as C++
---
offload/unittests/CMakeLists.txt | 4 ++--
.../Conformance/device_code/CMakeLists.txt | 2 +-
.../device_code/{LLVMLibm.c => LLVMLibm.cpp} | 3 +++
.../OffloadAPI/device_code/CMakeLists.txt | 20 +++++++++----------
.../OffloadAPI/device_code/{bar.c => bar.cpp} | 2 +-
.../OffloadAPI/device_code/{foo.c => foo.cpp} | 2 +-
.../device_code/{global.c => global.cpp} | 3 +++
.../{global_ctor.c => global_ctor.cpp} | 3 +++
.../{global_dtor.c => global_dtor.cpp} | 3 +++
.../device_code/{localmem.c => localmem.cpp} | 2 +-
...mem_reduction.c => localmem_reduction.cpp} | 2 +-
...{localmem_static.c => localmem_static.cpp} | 2 +-
.../unittests/OffloadAPI/device_code/noargs.c | 3 ---
.../OffloadAPI/device_code/noargs.cpp | 3 +++
.../device_code/{sequence.c => sequence.cpp} | 2 +-
15 files changed, 34 insertions(+), 22 deletions(-)
rename offload/unittests/Conformance/device_code/{LLVMLibm.c => LLVMLibm.cpp} (97%)
rename offload/unittests/OffloadAPI/device_code/{bar.c => bar.cpp} (63%)
rename offload/unittests/OffloadAPI/device_code/{foo.c => foo.cpp} (65%)
rename offload/unittests/OffloadAPI/device_code/{global.c => global.cpp} (92%)
rename offload/unittests/OffloadAPI/device_code/{global_ctor.c => global_ctor.cpp} (95%)
rename offload/unittests/OffloadAPI/device_code/{global_dtor.c => global_dtor.cpp} (87%)
rename offload/unittests/OffloadAPI/device_code/{localmem.c => localmem.cpp} (84%)
rename offload/unittests/OffloadAPI/device_code/{localmem_reduction.c => localmem_reduction.cpp} (83%)
rename offload/unittests/OffloadAPI/device_code/{localmem_static.c => localmem_static.cpp} (85%)
delete mode 100644 offload/unittests/OffloadAPI/device_code/noargs.c
create mode 100644 offload/unittests/OffloadAPI/device_code/noargs.cpp
rename offload/unittests/OffloadAPI/device_code/{sequence.c => sequence.cpp} (71%)
diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt
index 6d165ffd4c53a..1571658c7006b 100644
--- a/offload/unittests/CMakeLists.txt
+++ b/offload/unittests/CMakeLists.txt
@@ -38,7 +38,7 @@ function(add_offload_test_device_code test_filename test_name)
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.nvptx64.bin")
add_custom_command(
OUTPUT ${output_file}
- COMMAND ${CMAKE_C_COMPILER}
+ COMMAND ${CMAKE_CXX_COMPILER}
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
-nogpulib --cuda-path=${CUDA_ROOT} -flto ${ARGN}
${SRC_PATH} -o ${output_file}
@@ -62,7 +62,7 @@ function(add_offload_test_device_code test_filename test_name)
set(output_file "${CMAKE_CURRENT_BINARY_DIR}/${test_name}.amdgpu.bin")
add_custom_command(
OUTPUT ${output_file}
- COMMAND ${CMAKE_C_COMPILER}
+ COMMAND ${CMAKE_CXX_COMPILER}
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
-nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
DEPENDS ${SRC_PATH}
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 18f54b8dc5252..9cbd11096292c 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_offload_test_device_code(LLVMLibm.c llvm-libm -stdlib -fno-builtin)
+add_offload_test_device_code(LLVMLibm.cpp llvm-libm -stdlib -fno-builtin)
add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.c b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
similarity index 97%
rename from offload/unittests/Conformance/device_code/LLVMLibm.c
rename to offload/unittests/Conformance/device_code/LLVMLibm.cpp
index fe5196a539455..2c3d9bc5bf5cf 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.c
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -19,6 +19,8 @@
typedef _Float16 float16;
+extern "C" {
+
__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
size_t NumElements) {
uint32_t Index =
@@ -35,3 +37,4 @@ __gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
if (Index < NumElements)
Out[Index] = logf(X[Index]);
}
+} // extern "C"
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 0e4695ee9969f..50e430597e646 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -1,14 +1,14 @@
-add_offload_test_device_code(foo.c foo)
-add_offload_test_device_code(bar.c bar)
+add_offload_test_device_code(foo.cpp foo)
+add_offload_test_device_code(bar.cpp bar)
# Compile with optimizations to eliminate AMDGPU implicit arguments.
-add_offload_test_device_code(noargs.c noargs -O3)
-add_offload_test_device_code(localmem.c localmem)
-add_offload_test_device_code(localmem_reduction.c localmem_reduction)
-add_offload_test_device_code(localmem_static.c localmem_static)
-add_offload_test_device_code(global.c global)
-add_offload_test_device_code(global_ctor.c global_ctor)
-add_offload_test_device_code(global_dtor.c global_dtor)
-add_offload_test_device_code(sequence.c sequence)
+add_offload_test_device_code(noargs.cpp noargs -O3)
+add_offload_test_device_code(localmem.cpp localmem)
+add_offload_test_device_code(localmem_reduction.cpp localmem_reduction)
+add_offload_test_device_code(localmem_static.cpp localmem_static)
+add_offload_test_device_code(global.cpp global)
+add_offload_test_device_code(global_ctor.cpp global_ctor)
+add_offload_test_device_code(global_dtor.cpp global_dtor)
+add_offload_test_device_code(sequence.cpp sequence)
add_custom_target(offload_device_binaries DEPENDS
foo.bin
diff --git a/offload/unittests/OffloadAPI/device_code/bar.c b/offload/unittests/OffloadAPI/device_code/bar.cpp
similarity index 63%
rename from offload/unittests/OffloadAPI/device_code/bar.c
rename to offload/unittests/OffloadAPI/device_code/bar.cpp
index 786aa2f5d61e7..b5191671f293f 100644
--- a/offload/unittests/OffloadAPI/device_code/bar.c
+++ b/offload/unittests/OffloadAPI/device_code/bar.cpp
@@ -1,5 +1,5 @@
#include <gpuintrin.h>
-__gpu_kernel void foo(int *out) {
+extern "C" __gpu_kernel void foo(int *out) {
out[__gpu_thread_id(0)] = __gpu_thread_id(0) + 1;
}
diff --git a/offload/unittests/OffloadAPI/device_code/foo.c b/offload/unittests/OffloadAPI/device_code/foo.cpp
similarity index 65%
rename from offload/unittests/OffloadAPI/device_code/foo.c
rename to offload/unittests/OffloadAPI/device_code/foo.cpp
index 83cdc53cddd8d..cdc20015fc3e2 100644
--- a/offload/unittests/OffloadAPI/device_code/foo.c
+++ b/offload/unittests/OffloadAPI/device_code/foo.cpp
@@ -1,6 +1,6 @@
#include <gpuintrin.h>
#include <stdint.h>
-__gpu_kernel void foo(uint32_t *out) {
+extern "C" __gpu_kernel void foo(uint32_t *out) {
out[__gpu_thread_id(0)] = __gpu_thread_id(0);
}
diff --git a/offload/unittests/OffloadAPI/device_code/global.c b/offload/unittests/OffloadAPI/device_code/global.cpp
similarity index 92%
rename from offload/unittests/OffloadAPI/device_code/global.c
rename to offload/unittests/OffloadAPI/device_code/global.cpp
index 9f27f9424324f..dada16c87766c 100644
--- a/offload/unittests/OffloadAPI/device_code/global.c
+++ b/offload/unittests/OffloadAPI/device_code/global.cpp
@@ -1,6 +1,8 @@
#include <gpuintrin.h>
#include <stdint.h>
+extern "C" {
+
[[gnu::visibility("default")]]
uint32_t global[64];
@@ -13,3 +15,4 @@ __gpu_kernel void read(uint32_t *out) {
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =
global[__gpu_thread_id(0)];
}
+} // extern "C"
diff --git a/offload/unittests/OffloadAPI/device_code/global_ctor.c b/offload/unittests/OffloadAPI/device_code/global_ctor.cpp
similarity index 95%
rename from offload/unittests/OffloadAPI/device_code/global_ctor.c
rename to offload/unittests/OffloadAPI/device_code/global_ctor.cpp
index 27e2d71d7566e..a14f1d59bf950 100644
--- a/offload/unittests/OffloadAPI/device_code/global_ctor.c
+++ b/offload/unittests/OffloadAPI/device_code/global_ctor.cpp
@@ -1,6 +1,8 @@
#include <gpuintrin.h>
#include <stdint.h>
+extern "C" {
+
uint32_t global[64];
[[gnu::constructor(202)]] void ctorc() {
@@ -23,3 +25,4 @@ __gpu_kernel void global_ctor(uint32_t *out) {
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =
global[__gpu_thread_id(0)];
}
+} // extern "C"
diff --git a/offload/unittests/OffloadAPI/device_code/global_dtor.c b/offload/unittests/OffloadAPI/device_code/global_dtor.cpp
similarity index 87%
rename from offload/unittests/OffloadAPI/device_code/global_dtor.c
rename to offload/unittests/OffloadAPI/device_code/global_dtor.cpp
index cadcc19cc296b..6b1f941342b40 100644
--- a/offload/unittests/OffloadAPI/device_code/global_dtor.c
+++ b/offload/unittests/OffloadAPI/device_code/global_dtor.cpp
@@ -1,6 +1,8 @@
#include <gpuintrin.h>
#include <stdint.h>
+extern "C" {
+
uint32_t global[64];
[[gnu::destructor]] void dtor() {
@@ -11,3 +13,4 @@ uint32_t global[64];
__gpu_kernel void global_dtor() {
// no-op
}
+} // extern "C"
diff --git a/offload/unittests/OffloadAPI/device_code/localmem.c b/offload/unittests/OffloadAPI/device_code/localmem.cpp
similarity index 84%
rename from offload/unittests/OffloadAPI/device_code/localmem.c
rename to offload/unittests/OffloadAPI/device_code/localmem.cpp
index d70847900bc43..9542e2cb1d648 100644
--- a/offload/unittests/OffloadAPI/device_code/localmem.c
+++ b/offload/unittests/OffloadAPI/device_code/localmem.cpp
@@ -3,7 +3,7 @@
extern __gpu_local uint32_t shared_mem[];
-__gpu_kernel void localmem(uint32_t *out) {
+extern "C" __gpu_kernel void localmem(uint32_t *out) {
shared_mem[__gpu_thread_id(0)] = __gpu_thread_id(0);
shared_mem[__gpu_thread_id(0)] *= 2;
out[__gpu_thread_id(0) + (__gpu_num_threads(0) * __gpu_block_id(0))] =
diff --git a/offload/unittests/OffloadAPI/device_code/localmem_reduction.c b/offload/unittests/OffloadAPI/device_code/localmem_reduction.cpp
similarity index 83%
rename from offload/unittests/OffloadAPI/device_code/localmem_reduction.c
rename to offload/unittests/OffloadAPI/device_code/localmem_reduction.cpp
index 8a9a46cfb6a11..2c0a3e80b16e7 100644
--- a/offload/unittests/OffloadAPI/device_code/localmem_reduction.c
+++ b/offload/unittests/OffloadAPI/device_code/localmem_reduction.cpp
@@ -3,7 +3,7 @@
extern __gpu_local uint32_t shared_mem[];
-__gpu_kernel void localmem_reduction(uint32_t *out) {
+extern "C" __gpu_kernel void localmem_reduction(uint32_t *out) {
shared_mem[__gpu_thread_id(0)] = 2;
__gpu_sync_threads();
diff --git a/offload/unittests/OffloadAPI/device_code/localmem_static.c b/offload/unittests/OffloadAPI/device_code/localmem_static.cpp
similarity index 85%
rename from offload/unittests/OffloadAPI/device_code/localmem_static.c
rename to offload/unittests/OffloadAPI/device_code/localmem_static.cpp
index 928b48422a0d6..a8dd95473742c 100644
--- a/offload/unittests/OffloadAPI/device_code/localmem_static.c
+++ b/offload/unittests/OffloadAPI/device_code/localmem_static.cpp
@@ -4,7 +4,7 @@
[[clang::loader_uninitialized]]
__gpu_local uint32_t shared_mem[64];
-__gpu_kernel void localmem_static(uint32_t *out) {
+extern "C" __gpu_kernel void localmem_static(uint32_t *out) {
shared_mem[__gpu_thread_id(0)] = 2;
__gpu_sync_threads();
diff --git a/offload/unittests/OffloadAPI/device_code/noargs.c b/offload/unittests/OffloadAPI/device_code/noargs.c
deleted file mode 100644
index 36e609aa26a09..0000000000000
--- a/offload/unittests/OffloadAPI/device_code/noargs.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <gpuintrin.h>
-
-__gpu_kernel void noargs() { (void)0; }
diff --git a/offload/unittests/OffloadAPI/device_code/noargs.cpp b/offload/unittests/OffloadAPI/device_code/noargs.cpp
new file mode 100644
index 0000000000000..58f989c714fed
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/noargs.cpp
@@ -0,0 +1,3 @@
+#include <gpuintrin.h>
+
+extern "C" __gpu_kernel void noargs() { (void)0; }
diff --git a/offload/unittests/OffloadAPI/device_code/sequence.c b/offload/unittests/OffloadAPI/device_code/sequence.cpp
similarity index 71%
rename from offload/unittests/OffloadAPI/device_code/sequence.c
rename to offload/unittests/OffloadAPI/device_code/sequence.cpp
index 7662f2d817496..07f92944346f5 100644
--- a/offload/unittests/OffloadAPI/device_code/sequence.c
+++ b/offload/unittests/OffloadAPI/device_code/sequence.cpp
@@ -1,7 +1,7 @@
#include <gpuintrin.h>
#include <stdint.h>
-__gpu_kernel void sequence(uint32_t idx, uint32_t *inout) {
+extern "C" __gpu_kernel void sequence(uint32_t idx, uint32_t *inout) {
if (idx == 0)
inout[idx] = 0;
else if (idx == 1)
More information about the llvm-commits
mailing list