[llvm] [Offload][Conformance] Add randomized tests for double-precision math functions (PR #155003)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 11:14:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Leandro Lacerda (leandrolcampos)
<details>
<summary>Changes</summary>
This patch adds a set of randomized conformance tests for double-precision math functions.
The functions included in this set were selected based on the following criteria:
- An implementation exists in `libc/src/math/generic` (i.e., it is not just a wrapper around a compiler built-in).
- The corresponding LLVM CPU libm implementation is correctly rounded.
- The function is listed in Table 68 of the OpenCL C Specification v3.0.19.
---
Patch is 62.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155003.diff
20 Files Affected:
- (modified) offload/unittests/Conformance/device_code/CUDAMath.cpp (+93-1)
- (modified) offload/unittests/Conformance/device_code/DeviceAPIs.hpp (+30)
- (modified) offload/unittests/Conformance/device_code/HIPMath.cpp (+93-1)
- (modified) offload/unittests/Conformance/device_code/LLVMLibm.cpp (+94-2)
- (added) offload/unittests/Conformance/tests/AcosTest.cpp (+63)
- (added) offload/unittests/Conformance/tests/AsinTest.cpp (+63)
- (modified) offload/unittests/Conformance/tests/CMakeLists.txt (+15)
- (added) offload/unittests/Conformance/tests/CbrtTest.cpp (+63)
- (added) offload/unittests/Conformance/tests/CosTest.cpp (+63)
- (added) offload/unittests/Conformance/tests/Exp10Test.cpp (+64)
- (added) offload/unittests/Conformance/tests/Exp2Test.cpp (+63)
- (added) offload/unittests/Conformance/tests/ExpTest.cpp (+63)
- (added) offload/unittests/Conformance/tests/Expm1Test.cpp (+64)
- (added) offload/unittests/Conformance/tests/HypotTest.cpp (+65)
- (added) offload/unittests/Conformance/tests/Log10Test.cpp (+67)
- (added) offload/unittests/Conformance/tests/Log1pTest.cpp (+67)
- (added) offload/unittests/Conformance/tests/Log2Test.cpp (+66)
- (added) offload/unittests/Conformance/tests/SinTest.cpp (+63)
- (added) offload/unittests/Conformance/tests/SincosTest.cpp (+80)
- (added) offload/unittests/Conformance/tests/TanTest.cpp (+63)
``````````diff
diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp
index d47607a7c862e..d80660b2e3c74 100644
--- a/offload/unittests/Conformance/device_code/CUDAMath.cpp
+++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@@ -30,6 +30,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) {
return __nv_powf(Base, __nv_roundf(Exponent));
}
+static inline double sincosSin(double X) {
+ double SinX, CosX;
+ __nv_sincos(X, &SinX, &CosX);
+ return SinX;
+}
+
+static inline double sincosCos(double X) {
+ double SinX, CosX;
+ __nv_sincos(X, &SinX, &CosX);
+ return CosX;
+}
+
static inline float sincosfSin(float X) {
float SinX, CosX;
__nv_sincosf(X, &SinX, &CosX);
@@ -48,6 +60,11 @@ static inline float sincosfCos(float X) {
extern "C" {
+__gpu_kernel void acosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_acos>(NumElements, Out, X);
+}
+
__gpu_kernel void acosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_acosf>(NumElements, Out, X);
@@ -58,6 +75,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out,
runKernelBody<__nv_acoshf>(NumElements, Out, X);
}
+__gpu_kernel void asinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_asin>(NumElements, Out, X);
+}
+
__gpu_kernel void asinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_asinf>(NumElements, Out, X);
@@ -83,11 +105,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<__nv_atanhf>(NumElements, Out, X);
}
+__gpu_kernel void cbrtKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_cbrt>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_cbrtf>(NumElements, Out, X);
}
+__gpu_kernel void cosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_cos>(NumElements, Out, X);
+}
+
__gpu_kernel void cosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_cosf>(NumElements, Out, X);
@@ -108,27 +140,52 @@ __gpu_kernel void erffKernel(const float *X, float *Out,
runKernelBody<__nv_erff>(NumElements, Out, X);
}
+__gpu_kernel void expKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_exp>(NumElements, Out, X);
+}
+
__gpu_kernel void expfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_expf>(NumElements, Out, X);
}
+__gpu_kernel void exp10Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_exp10>(NumElements, Out, X);
+}
+
__gpu_kernel void exp10fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_exp10f>(NumElements, Out, X);
}
+__gpu_kernel void exp2Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_exp2>(NumElements, Out, X);
+}
+
__gpu_kernel void exp2fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_exp2f>(NumElements, Out, X);
}
+__gpu_kernel void expm1Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_expm1>(NumElements, Out, X);
+}
+
__gpu_kernel void expm1fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_expm1f>(NumElements, Out, X);
}
-__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out,
+__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_hypot>(NumElements, Out, X, Y);
+}
+
+__gpu_kernel void hypotfKernel(const float *X, const float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_hypotf>(NumElements, Out, X, Y);
}
@@ -143,16 +200,31 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
runKernelBody<__nv_logf>(NumElements, Out, X);
}
+__gpu_kernel void log10Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log10>(NumElements, Out, X);
+}
+
__gpu_kernel void log10fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_log10f>(NumElements, Out, X);
}
+__gpu_kernel void log1pKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log1p>(NumElements, Out, X);
+}
+
__gpu_kernel void log1pfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_log1pf>(NumElements, Out, X);
}
+__gpu_kernel void log2Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_log2>(NumElements, Out, X);
+}
+
__gpu_kernel void log2fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_log2f>(NumElements, Out, X);
@@ -169,11 +241,26 @@ __gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
}
+__gpu_kernel void sinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_sin>(NumElements, Out, X);
+}
+
__gpu_kernel void sinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_sinf>(NumElements, Out, X);
}
+__gpu_kernel void sincosSinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosCosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosCos>(NumElements, Out, X);
+}
+
__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sincosfSin>(NumElements, Out, X);
@@ -194,6 +281,11 @@ __gpu_kernel void sinpifKernel(const float *X, float *Out,
runKernelBody<__nv_sinpif>(NumElements, Out, X);
}
+__gpu_kernel void tanKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__nv_tan>(NumElements, Out, X);
+}
+
__gpu_kernel void tanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__nv_tanf>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
index 6504fff125640..32f21991d9ec3 100644
--- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -48,34 +48,49 @@ extern const inline uint32_t __oclc_ISA_version = 9000;
extern "C" {
+double __nv_acos(double);
float __nv_acosf(float);
float __nv_acoshf(float);
+double __nv_asin(double);
float __nv_asinf(float);
float __nv_asinhf(float);
float __nv_atanf(float);
float __nv_atan2f(float, float);
float __nv_atanhf(float);
+double __nv_cbrt(double);
float __nv_cbrtf(float);
+double __nv_cos(double);
float __nv_cosf(float);
float __nv_coshf(float);
float __nv_cospif(float);
float __nv_erff(float);
+double __nv_exp(double);
float __nv_expf(float);
+double __nv_exp10(double);
float __nv_exp10f(float);
+double __nv_exp2(double);
float __nv_exp2f(float);
+double __nv_expm1(double);
float __nv_expm1f(float);
+double __nv_hypot(double, double);
float __nv_hypotf(float, float);
double __nv_log(double);
float __nv_logf(float);
+double __nv_log10(double);
float __nv_log10f(float);
+double __nv_log1p(double);
float __nv_log1pf(float);
+double __nv_log2(double);
float __nv_log2f(float);
float __nv_powf(float, float);
float __nv_roundf(float);
+double __nv_sin(double);
float __nv_sinf(float);
+void __nv_sincos(double, double *, double *);
void __nv_sincosf(float, float *, float *);
float __nv_sinhf(float);
float __nv_sinpif(float);
+double __nv_tan(double);
float __nv_tanf(float);
float __nv_tanhf(float);
} // extern "C"
@@ -86,34 +101,49 @@ float __nv_tanhf(float);
extern "C" {
+double __ocml_acos_f64(double);
float __ocml_acos_f32(float);
float __ocml_acosh_f32(float);
+double __ocml_asin_f64(double);
float __ocml_asin_f32(float);
float __ocml_asinh_f32(float);
float __ocml_atan_f32(float);
float __ocml_atan2_f32(float, float);
float __ocml_atanh_f32(float);
+double __ocml_cbrt_f64(double);
float __ocml_cbrt_f32(float);
+double __ocml_cos_f64(double);
float __ocml_cos_f32(float);
float __ocml_cosh_f32(float);
float __ocml_cospi_f32(float);
float __ocml_erf_f32(float);
+double __ocml_exp_f64(double);
float __ocml_exp_f32(float);
+double __ocml_exp10_f64(double);
float __ocml_exp10_f32(float);
+double __ocml_exp2_f64(double);
float __ocml_exp2_f32(float);
+double __ocml_expm1_f64(double);
float __ocml_expm1_f32(float);
+double __ocml_hypot_f64(double, double);
float __ocml_hypot_f32(float, float);
double __ocml_log_f64(double);
float __ocml_log_f32(float);
+double __ocml_log10_f64(double);
float __ocml_log10_f32(float);
+double __ocml_log1p_f64(double);
float __ocml_log1p_f32(float);
+double __ocml_log2_f64(double);
float __ocml_log2_f32(float);
float __ocml_pow_f32(float, float);
float __ocml_round_f32(float);
+double __ocml_sin_f64(double);
float __ocml_sin_f32(float);
+double __ocml_sincos_f64(double, double *);
float __ocml_sincos_f32(float, float *);
float __ocml_sinh_f32(float);
float __ocml_sinpi_f32(float);
+double __ocml_tan_f64(double);
float __ocml_tan_f32(float);
float __ocml_tanh_f32(float);
} // extern "C"
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
index 74a7f5c3a9492..71dea4c8d2656 100644
--- a/offload/unittests/Conformance/device_code/HIPMath.cpp
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -30,6 +30,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) {
return __ocml_pow_f32(Base, __ocml_round_f32(Exponent));
}
+static inline double sincosSin(double X) {
+ double CosX;
+ double SinX = __ocml_sincos_f64(X, &CosX);
+ return SinX;
+}
+
+static inline double sincosCos(double X) {
+ double CosX;
+ double SinX = __ocml_sincos_f64(X, &CosX);
+ return CosX;
+}
+
static inline float sincosfSin(float X) {
float CosX;
float SinX = __ocml_sincos_f32(X, &CosX);
@@ -48,6 +60,11 @@ static inline float sincosfCos(float X) {
extern "C" {
+__gpu_kernel void acosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acos_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void acosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
@@ -58,6 +75,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out,
runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
}
+__gpu_kernel void asinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asin_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void asinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
@@ -83,11 +105,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
}
+__gpu_kernel void cbrtKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cbrt_f32>(NumElements, Out, X);
}
+__gpu_kernel void cosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cos_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void cosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
@@ -108,27 +140,52 @@ __gpu_kernel void erffKernel(const float *X, float *Out,
runKernelBody<__ocml_erf_f32>(NumElements, Out, X);
}
+__gpu_kernel void expKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void expfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
}
+__gpu_kernel void exp10Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp10_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void exp10fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
}
+__gpu_kernel void exp2Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp2_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void exp2fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
}
+__gpu_kernel void expm1Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_expm1_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void expm1fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
}
-__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out,
+__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y);
+}
+
+__gpu_kernel void hypotfKernel(const float *X, const float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_hypot_f32>(NumElements, Out, X, Y);
}
@@ -143,16 +200,31 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
}
+__gpu_kernel void log10Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log10_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void log10fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
}
+__gpu_kernel void log1pKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log1p_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void log1pfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log1p_f32>(NumElements, Out, X);
}
+__gpu_kernel void log2Kernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log2_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void log2fKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
@@ -169,11 +241,26 @@ __gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y,
runKernelBody<powfRoundedExponent>(NumElements, Out, X, Y);
}
+__gpu_kernel void sinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sin_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void sinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
}
+__gpu_kernel void sincosSinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosCosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sincosCos>(NumElements, Out, X);
+}
+
__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sincosfSin>(NumElements, Out, X);
@@ -194,6 +281,11 @@ __gpu_kernel void sinpifKernel(const float *X, float *Out,
runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
}
+__gpu_kernel void tanKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tan_f64>(NumElements, Out, X);
+}
+
__gpu_kernel void tanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
index 20ad796c6d172..e25f8e1c6c042 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -29,6 +29,18 @@ static inline float powfRoundedExponent(float Base, float Exponent) {
return powf(Base, roundf(Exponent));
}
+static inline double sincosSin(double X) {
+ double SinX, CosX;
+ sincos(X, &SinX, &CosX);
+ return SinX;
+}
+
+static inline double sincosCos(double X) {
+ double SinX, CosX;
+ sincos(X, &SinX, &CosX);
+ return CosX;
+}
+
static inline float sincosfSin(float X) {
float SinX, CosX;
sincosf(X, &SinX, &CosX);
@@ -47,6 +59,11 @@ static inline float sincosfCos(float X) {
extern "C" {
+__gpu_kernel void acosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acos>(NumElements, Out, X);
+}
+
__gpu_kernel void acosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<acosf>(NumElements, Out, X);
@@ -57,6 +74,11 @@ __gpu_kernel void acoshfKernel(const float *X, float *Out,
runKernelBody<acoshf>(NumElements, Out, X);
}
+__gpu_kernel void asinKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<asin>(NumElements, Out, X);
+}
+
__gpu_kernel void asinfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<asinf>(NumElements, Out, X);
@@ -82,11 +104,21 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<atanhf>(NumElements, Out, X);
}
+__gpu_kernel void cbrtKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<cbrt>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cbrtf>(NumElements, Out, X);
}
+__gpu_kernel void cosKernel(const double *X, double *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<cos>(NumElements, Out, X);
+}
+
__gpu_kernel void cosfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cosf>(NumElements, Out, X);
@@ -107,32 +139,57 @@ __gpu_kernel void erffKernel(const float *...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155003
More information about the llvm-commits
mailing list