[llvm] [Offload][Conformance] Add support for CUDA Math and HIP Math providers (PR #152362)
Leandro Lacerda via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 11:38:22 PDT 2025
https://github.com/leandrolcampos created https://github.com/llvm/llvm-project/pull/152362
This patch extends the conformance testing infrastructure to support two new providers of math function implementations for GPUs: CUDA Math (`cuda-math`) and HIP Math (`hip-math`).
>From 11b056478e5b6130cf908f418874f36e0821c6c1 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 6 Aug 2025 14:57:31 -0300
Subject: [PATCH 1/2] Redirect test preamble output to `stderr`
---
offload/unittests/Conformance/include/mathtest/TestRunner.hpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
index f89d151d0161e..ab17f1d83768a 100644
--- a/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
+++ b/offload/unittests/Conformance/include/mathtest/TestRunner.hpp
@@ -41,11 +41,11 @@ void printPreamble(const TestConfig &Config, size_t Index,
size_t Total) noexcept {
using FunctionConfig = FunctionConfig<Func>;
- llvm::outs() << "[" << (Index + 1) << "/" << Total << "] "
+ llvm::errs() << "[" << (Index + 1) << "/" << Total << "] "
<< "Running conformance test '" << FunctionConfig::Name
<< "' with '" << Config.Provider << "' on '" << Config.Platform
<< "'\n";
- llvm::outs().flush();
+ llvm::errs().flush();
}
template <typename T>
>From af1a72a3ec40164fb2ea8edbfddf498c0b848371 Mon Sep 17 00:00:00 2001
From: Leandro Augusto Lacerda Campos <leandrolcampos at yahoo.com.br>
Date: Wed, 6 Aug 2025 15:26:09 -0300
Subject: [PATCH 2/2] Add support for `cuda-math` and `hip-math` providers
---
.../Conformance/device_code/CMakeLists.txt | 28 ++-
.../Conformance/device_code/CUDAMath.cpp | 178 ++++++++++++++++++
.../Conformance/device_code/DeviceAPIs.hpp | 113 +++++++++++
.../Conformance/device_code/HIPMath.cpp | 178 ++++++++++++++++++
.../{Common.hpp => KernelRunner.hpp} | 16 +-
.../Conformance/device_code/LLVMLibm.cpp | 5 +-
6 files changed, 506 insertions(+), 12 deletions(-)
create mode 100644 offload/unittests/Conformance/device_code/CUDAMath.cpp
create mode 100644 offload/unittests/Conformance/device_code/DeviceAPIs.hpp
create mode 100644 offload/unittests/Conformance/device_code/HIPMath.cpp
rename offload/unittests/Conformance/device_code/{Common.hpp => KernelRunner.hpp} (70%)
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 789dd167bb9ff..992f54c0c2376 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,4 +1,30 @@
+set(cuda_math_flags "")
+find_package(CUDAToolkit QUIET)
+if(CUDAToolkit_FOUND)
+ file(GLOB libdevice_paths "${CUDAToolkit_LIBRARY_ROOT}/nvvm/libdevice/libdevice.*.bc")
+ list(GET libdevice_paths 0 libdevice_path)
+
+ if (EXISTS ${libdevice_path})
+ list(APPEND cuda_math_flags "-Xclang" "-mlink-builtin-bitcode" "-Xclang" "${libdevice_path}")
+ list(APPEND cuda_math_flags "-DCUDA_MATH_FOUND=1")
+ endif()
+endif()
+
+set(hip_math_flags "")
+find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if(AMDDeviceLibs_FOUND)
+ get_target_property(ocml_path ocml IMPORTED_LOCATION)
+ list(APPEND hip_math_flags "-Xclang" "-mlink-builtin-bitcode" "-Xclang" "${ocml_path}")
+ list(APPEND hip_math_flags "-DHIP_MATH_FOUND=1")
+endif()
+
+add_offload_test_device_code(CUDAMath.cpp cuda-math -O3 -stdlib -fno-builtin ${cuda_math_flags})
+add_offload_test_device_code(HIPMath.cpp hip-math -O3 -stdlib -fno-builtin ${hip_math_flags})
add_offload_test_device_code(LLVMLibm.cpp llvm-libm -O3 -stdlib -fno-builtin)
-add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
+add_custom_target(conformance_device_binaries DEPENDS
+ cuda-math.bin
+ hip-math.bin
+ llvm-libm.bin
+)
set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp
new file mode 100644
index 0000000000000..a351e924b8f89
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@@ -0,0 +1,178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of the device kernels that wrap the
+/// math functions from the cuda-math provider.
+///
+//===----------------------------------------------------------------------===//
+
+#ifdef CUDA_MATH_FOUND
+
+#include "Conformance/device_code/DeviceAPIs.hpp"
+#include "Conformance/device_code/KernelRunner.hpp"
+
+#include <gpuintrin.h>
+#include <stddef.h>
+
+using namespace kernels;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+static inline float sincosfSin(float X) {
+ float SinX, CosX;
+ __nv_sincosf(X, &SinX, &CosX);
+ return SinX;
+}
+
+static inline float sincosfCos(float X) {
+ float SinX, CosX;
+ __nv_sincosf(X, &SinX, &CosX);
+ return CosX;
+}
+
+//===----------------------------------------------------------------------===//
+// Kernels
+//===----------------------------------------------------------------------===//
+
+extern "C" {
+
+__gpu_kernel void acosfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_acosf>(NumElements, Out, X);
+}
+
+__gpu_kernel void acoshfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_acoshf>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_asinf>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_asinhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_atanf>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_atanhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cbrtfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_cbrtf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cosfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_cosf>(NumElements, Out, X);
+}
+
+__gpu_kernel void coshfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_coshf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cospifKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_cospif>(NumElements, Out, X);
+}
+
+__gpu_kernel void erffKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_erff>(NumElements, Out, X);
+}
+
+__gpu_kernel void expfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_expf>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp10fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_exp10f>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp2fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_exp2f>(NumElements, Out, X);
+}
+
+__gpu_kernel void expm1fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_expm1f>(NumElements, Out, X);
+}
+
+__gpu_kernel void logfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_logf>(NumElements, Out, X);
+}
+
+__gpu_kernel void log10fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log10f>(NumElements, Out, X);
+}
+
+__gpu_kernel void log1pfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log1pf>(NumElements, Out, X);
+}
+
+__gpu_kernel void log2fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log2f>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_sinf>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosfSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosfCos>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_sinhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinpifKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_sinpif>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_tanf>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_tanhf>(NumElements, Out, X);
+}
+} // extern "C"
+
+#endif // CUDA_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
new file mode 100644
index 0000000000000..8476dcbeff0c9
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -0,0 +1,113 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains platform-specific definitions and forward declarations
+/// for device-side APIs used by the kernels.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef CONFORMANCE_DEVICE_CODE_DEVICEAPIS_HPP
+#define CONFORMANCE_DEVICE_CODE_DEVICEAPIS_HPP
+
+#include <stdint.h>
+
+typedef _Float16 float16;
+
+#ifdef __AMDGPU__
+
+// The ROCm device library uses control globals to alter codegen for the
+// different targets. To avoid needing to link them in manually, we simply
+// define them here.
+extern "C" {
+extern const inline uint8_t __oclc_unsafe_math_opt = 0;
+extern const inline uint8_t __oclc_daz_opt = 0;
+extern const inline uint8_t __oclc_correctly_rounded_sqrt32 = 1;
+extern const inline uint8_t __oclc_finite_only_opt = 0;
+extern const inline uint32_t __oclc_ISA_version = 9000;
+}
+
+// These aliases cause Clang to emit the control constants with ODR linkage.
+// This allows us to link against the symbols without preventing them from being
+// optimized out or causing symbol collisions.
+[[gnu::alias("__oclc_unsafe_math_opt")]] const uint8_t __oclc_unsafe_math_opt__;
+[[gnu::alias("__oclc_daz_opt")]] const uint8_t __oclc_daz_opt__;
+[[gnu::alias("__oclc_correctly_rounded_sqrt32")]] const uint8_t
+ __oclc_correctly_rounded_sqrt32__;
+[[gnu::alias("__oclc_finite_only_opt")]] const uint8_t __oclc_finite_only_opt__;
+[[gnu::alias("__oclc_ISA_version")]] const uint32_t __oclc_ISA_version__;
+
+#endif // __AMDGPU__
+
+#ifdef CUDA_MATH_FOUND
+
+extern "C" {
+
+float __nv_acosf(float);
+float __nv_acoshf(float);
+float __nv_asinf(float);
+float __nv_asinhf(float);
+float __nv_atanf(float);
+float __nv_atanhf(float);
+float __nv_cbrtf(float);
+float __nv_cosf(float);
+float __nv_coshf(float);
+float __nv_cospif(float);
+float __nv_erff(float);
+float __nv_expf(float);
+float __nv_exp10f(float);
+float __nv_exp2f(float);
+float __nv_expm1f(float);
+float __nv_logf(float);
+float __nv_log10f(float);
+float __nv_log1pf(float);
+float __nv_log2f(float);
+float __nv_sinf(float);
+void __nv_sincosf(float, float *, float *);
+float __nv_sinhf(float);
+float __nv_sinpif(float);
+float __nv_tanf(float);
+float __nv_tanhf(float);
+} // extern "C"
+
+#endif // CUDA_MATH_FOUND
+
+#ifdef HIP_MATH_FOUND
+
+extern "C" {
+
+float __ocml_acos_f32(float);
+float __ocml_acosh_f32(float);
+float __ocml_asin_f32(float);
+float __ocml_asinh_f32(float);
+float __ocml_atan_f32(float);
+float __ocml_atanh_f32(float);
+float __ocml_cbrt_f32(float);
+float __ocml_cos_f32(float);
+float __ocml_cosh_f32(float);
+float __ocml_cospi_f32(float);
+float __ocml_erf_f32(float);
+float __ocml_exp_f32(float);
+float __ocml_exp10_f32(float);
+float __ocml_exp2_f32(float);
+float __ocml_expm1_f32(float);
+float __ocml_log_f32(float);
+float __ocml_log10_f32(float);
+float __ocml_log1p_f32(float);
+float __ocml_log2_f32(float);
+float __ocml_sin_f32(float);
+float __ocml_sincos_f32(float, float *);
+float __ocml_sinh_f32(float);
+float __ocml_sinpi_f32(float);
+float __ocml_tan_f32(float);
+float __ocml_tanh_f32(float);
+} // extern "C"
+
+#endif // HIP_MATH_FOUND
+
+#endif // CONFORMANCE_DEVICE_CODE_DEVICEAPIS_HPP
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
new file mode 100644
index 0000000000000..36efe6b2696ab
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -0,0 +1,178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of the device kernels that wrap the
+/// math functions from the hip-math provider.
+///
+//===----------------------------------------------------------------------===//
+
+#ifdef HIP_MATH_FOUND
+
+#include "Conformance/device_code/DeviceAPIs.hpp"
+#include "Conformance/device_code/KernelRunner.hpp"
+
+#include <gpuintrin.h>
+#include <stddef.h>
+
+using namespace kernels;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+static inline float sincosfSin(float X) {
+ float CosX;
+ float SinX = __ocml_sincos_f32(X, &CosX);
+ return SinX;
+}
+
+static inline float sincosfCos(float X) {
+ float CosX;
+ float SinX = __ocml_sincos_f32(X, &CosX);
+ return CosX;
+}
+
+//===----------------------------------------------------------------------===//
+// Kernels
+//===----------------------------------------------------------------------===//
+
+extern "C" {
+
+__gpu_kernel void acosfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void acoshfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void cbrtfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cbrt_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void cosfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void coshfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void cospifKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void erffKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_erf_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void expfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp10fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp2fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void expm1fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void logfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void log10fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void log1pfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log1p_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void log2fKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosfSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosfCos>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinpifKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanhfKernel(const float *X, float *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
+}
+} // extern "C"
+
+#endif // HIP_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/Common.hpp b/offload/unittests/Conformance/device_code/KernelRunner.hpp
similarity index 70%
rename from offload/unittests/Conformance/device_code/Common.hpp
rename to offload/unittests/Conformance/device_code/KernelRunner.hpp
index bcf3ac617b54c..e64a62fbdf018 100644
--- a/offload/unittests/Conformance/device_code/Common.hpp
+++ b/offload/unittests/Conformance/device_code/KernelRunner.hpp
@@ -7,21 +7,19 @@
//===----------------------------------------------------------------------===//
///
/// \file
-/// This file contains common utilities for defining device kernel wrappers to
-/// math functions.
+/// This file contains the definition of the runKernelBody, a template helper
+/// that executes the per-thread logic of a math function's kernel wrapper.
///
//===----------------------------------------------------------------------===//
-#ifndef CONFORMANCE_DEVICE_CODE_COMMON_HPP
-#define CONFORMANCE_DEVICE_CODE_COMMON_HPP
+#ifndef CONFORMANCE_DEVICE_CODE_KERNELRUNNER_HPP
+#define CONFORMANCE_DEVICE_CODE_KERNELRUNNER_HPP
#include <gpuintrin.h>
#include <stddef.h>
#include <stdint.h>
-namespace common {
-
-typedef _Float16 float16;
+namespace kernels {
template <auto Func, typename OutType, typename... InTypes>
void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
@@ -32,6 +30,6 @@ void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
Out[Index] = Func(Ins[Index]...);
}
}
-} // namespace common
+} // namespace kernels
-#endif // CONFORMANCE_DEVICE_CODE_COMMON_HPP
+#endif // CONFORMANCE_DEVICE_CODE_KERNELRUNNER_HPP
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
index f137ba3d23752..8869d87017486 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -12,13 +12,14 @@
///
//===----------------------------------------------------------------------===//
-#include "Conformance/device_code/Common.hpp"
+#include "Conformance/device_code/DeviceAPIs.hpp"
+#include "Conformance/device_code/KernelRunner.hpp"
#include <gpuintrin.h>
#include <math.h>
#include <stddef.h>
-using namespace common;
+using namespace kernels;
//===----------------------------------------------------------------------===//
// Helpers
More information about the llvm-commits
mailing list