[llvm] [Offload][Conformance] Add exhaustive tests for half-precision math functions (PR #155112)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 23 09:58:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Leandro Lacerda (leandrolcampos)
<details>
<summary>Changes</summary>
This patch adds a set of randomized conformance tests for double-precision math functions.
The functions included in this set were selected based on the following criteria:
- An implementation exists in `libc/src/math/generic` (i.e., it is not just a wrapper around a compiler built-in).
- The corresponding LLVM CPU libm implementation is correctly rounded.
- The function is listed in Table 69 of the OpenCL C Specification v3.0.19.
This patch also fixes the testing range of the following functions: `acos`, `acosf`, `asin`, `asinf`, and `log1p`.
---
Patch is 78.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155112.diff
32 Files Affected:
- (modified) offload/unittests/Conformance/device_code/DeviceAPIs.hpp (+19)
- (modified) offload/unittests/Conformance/device_code/HIPMath.cpp (+95)
- (modified) offload/unittests/Conformance/device_code/LLVMLibm.cpp (+115)
- (modified) offload/unittests/Conformance/tests/AcosTest.cpp (+3-1)
- (added) offload/unittests/Conformance/tests/Acosf16Test.cpp (+61)
- (modified) offload/unittests/Conformance/tests/AcosfTest.cpp (+3-1)
- (added) offload/unittests/Conformance/tests/Acoshf16Test.cpp (+62)
- (added) offload/unittests/Conformance/tests/Acospif16Test.cpp (+61)
- (modified) offload/unittests/Conformance/tests/AsinTest.cpp (+3-1)
- (added) offload/unittests/Conformance/tests/Asinf16Test.cpp (+61)
- (modified) offload/unittests/Conformance/tests/AsinfTest.cpp (+3-1)
- (added) offload/unittests/Conformance/tests/Asinhf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Atanf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Atanhf16Test.cpp (+61)
- (modified) offload/unittests/Conformance/tests/CMakeLists.txt (+23)
- (added) offload/unittests/Conformance/tests/Cosf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Coshf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Cospif16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Exp10f16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Exp2f16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Expf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Expm1f16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Log10f16Test.cpp (+62)
- (modified) offload/unittests/Conformance/tests/Log1pTest.cpp (+1-1)
- (added) offload/unittests/Conformance/tests/Log2f16Test.cpp (+62)
- (added) offload/unittests/Conformance/tests/Logf16Test.cpp (+62)
- (added) offload/unittests/Conformance/tests/Sinf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Sinhf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Sinpif16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Tanf16Test.cpp (+61)
- (added) offload/unittests/Conformance/tests/Tanhf16Test.cpp (+59)
- (added) offload/unittests/Conformance/tests/Tanpif16Test.cpp (+59)
``````````diff
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
index 32f21991d9ec3..894652a8e1af1 100644
--- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -103,49 +103,68 @@ extern "C" {
double __ocml_acos_f64(double);
float __ocml_acos_f32(float);
+float16 __ocml_acos_f16(float16);
float __ocml_acosh_f32(float);
+float16 __ocml_acosh_f16(float16);
double __ocml_asin_f64(double);
float __ocml_asin_f32(float);
+float16 __ocml_asin_f16(float16);
float __ocml_asinh_f32(float);
+float16 __ocml_asinh_f16(float16);
float __ocml_atan_f32(float);
+float16 __ocml_atan_f16(float16);
float __ocml_atan2_f32(float, float);
float __ocml_atanh_f32(float);
+float16 __ocml_atanh_f16(float16);
double __ocml_cbrt_f64(double);
float __ocml_cbrt_f32(float);
double __ocml_cos_f64(double);
float __ocml_cos_f32(float);
+float16 __ocml_cos_f16(float16);
float __ocml_cosh_f32(float);
+float16 __ocml_cosh_f16(float16);
float __ocml_cospi_f32(float);
float __ocml_erf_f32(float);
double __ocml_exp_f64(double);
float __ocml_exp_f32(float);
+float16 __ocml_exp_f16(float16);
double __ocml_exp10_f64(double);
float __ocml_exp10_f32(float);
+float16 __ocml_exp10_f16(float16);
double __ocml_exp2_f64(double);
float __ocml_exp2_f32(float);
+float16 __ocml_exp2_f16(float16);
double __ocml_expm1_f64(double);
float __ocml_expm1_f32(float);
+float16 __ocml_expm1_f16(float16);
double __ocml_hypot_f64(double, double);
float __ocml_hypot_f32(float, float);
double __ocml_log_f64(double);
float __ocml_log_f32(float);
+float16 __ocml_log_f16(float16);
double __ocml_log10_f64(double);
float __ocml_log10_f32(float);
+float16 __ocml_log10_f16(float16);
double __ocml_log1p_f64(double);
float __ocml_log1p_f32(float);
double __ocml_log2_f64(double);
float __ocml_log2_f32(float);
+float16 __ocml_log2_f16(float16);
float __ocml_pow_f32(float, float);
float __ocml_round_f32(float);
double __ocml_sin_f64(double);
float __ocml_sin_f32(float);
+float16 __ocml_sin_f16(float16);
double __ocml_sincos_f64(double, double *);
float __ocml_sincos_f32(float, float *);
float __ocml_sinh_f32(float);
+float16 __ocml_sinh_f16(float16);
float __ocml_sinpi_f32(float);
double __ocml_tan_f64(double);
float __ocml_tan_f32(float);
+float16 __ocml_tan_f16(float16);
float __ocml_tanh_f32(float);
+float16 __ocml_tanh_f16(float16);
} // extern "C"
#endif // HIP_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
index 71dea4c8d2656..7cc0ad5d9142e 100644
--- a/offload/unittests/Conformance/device_code/HIPMath.cpp
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -70,11 +70,21 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
}
+__gpu_kernel void acosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acos_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void acoshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
}
+__gpu_kernel void acoshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_acosh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_asin_f64>(NumElements, Out, X);
@@ -85,16 +95,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
}
+__gpu_kernel void asinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asin_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
}
+__gpu_kernel void asinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_asinh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void atanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
}
+__gpu_kernel void atanf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_atan_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y);
@@ -105,6 +130,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
}
+__gpu_kernel void atanhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_atanh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cbrt_f64>(NumElements, Out, X);
@@ -125,11 +155,21 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
}
+__gpu_kernel void cosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cos_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void coshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
}
+__gpu_kernel void coshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_cosh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void cospifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
@@ -150,6 +190,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
}
+__gpu_kernel void expf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp10_f64>(NumElements, Out, X);
@@ -160,6 +205,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
}
+__gpu_kernel void exp10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp10_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp2Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_exp2_f64>(NumElements, Out, X);
@@ -170,6 +220,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
}
+__gpu_kernel void exp2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_exp2_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void expm1Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_expm1_f64>(NumElements, Out, X);
@@ -180,6 +235,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
}
+__gpu_kernel void expm1f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_expm1_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_hypot_f64>(NumElements, Out, X, Y);
@@ -200,6 +260,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
runKernelBody<__ocml_log_f32>(NumElements, Out, X);
}
+__gpu_kernel void logf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void log10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log10_f64>(NumElements, Out, X);
@@ -210,6 +275,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
}
+__gpu_kernel void log10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log10_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void log1pKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_log1p_f64>(NumElements, Out, X);
@@ -230,6 +300,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
}
+__gpu_kernel void log2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_log2_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y);
@@ -251,6 +326,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
}
+__gpu_kernel void sinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sin_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void sincosSinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<sincosSin>(NumElements, Out, X);
@@ -276,6 +356,11 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
}
+__gpu_kernel void sinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_sinh_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void sinpifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
@@ -291,10 +376,20 @@ __gpu_kernel void tanfKernel(const float *X, float *Out,
runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
}
+__gpu_kernel void tanf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tan_f16>(NumElements, Out, X);
+}
+
__gpu_kernel void tanhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
}
+
+__gpu_kernel void tanhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<__ocml_tanh_f16>(NumElements, Out, X);
+}
} // extern "C"
#endif // HIP_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
index baf23a3467f35..8673d809fd0a2 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -69,11 +69,26 @@ __gpu_kernel void acosfKernel(const float *X, float *Out,
runKernelBody<acosf>(NumElements, Out, X);
}
+__gpu_kernel void acosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acosf16>(NumElements, Out, X);
+}
+
__gpu_kernel void acoshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<acoshf>(NumElements, Out, X);
}
+__gpu_kernel void acoshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acoshf16>(NumElements, Out, X);
+}
+
+__gpu_kernel void acospif16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<acospif16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<asin>(NumElements, Out, X);
@@ -84,16 +99,31 @@ __gpu_kernel void asinfKernel(const float *X, float *Out,
runKernelBody<asinf>(NumElements, Out, X);
}
+__gpu_kernel void asinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<asinf16>(NumElements, Out, X);
+}
+
__gpu_kernel void asinhfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<asinhf>(NumElements, Out, X);
}
+__gpu_kernel void asinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<asinhf16>(NumElements, Out, X);
+}
+
__gpu_kernel void atanfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<atanf>(NumElements, Out, X);
}
+__gpu_kernel void atanf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<atanf16>(NumElements, Out, X);
+}
+
__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<atan2f>(NumElements, Out, X, Y);
@@ -104,6 +134,11 @@ __gpu_kernel void atanhfKernel(const float *X, float *Out,
runKernelBody<atanhf>(NumElements, Out, X);
}
+__gpu_kernel void atanhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<atanhf16>(NumElements, Out, X);
+}
+
__gpu_kernel void cbrtKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<cbrt>(NumElements, Out, X);
@@ -124,16 +159,31 @@ __gpu_kernel void cosfKernel(const float *X, float *Out,
runKernelBody<cosf>(NumElements, Out, X);
}
+__gpu_kernel void cosf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<cosf16>(NumElements, Out, X);
+}
+
__gpu_kernel void coshfKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<coshf>(NumElements, Out, X);
}
+__gpu_kernel void coshf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<coshf16>(NumElements, Out, X);
+}
+
__gpu_kernel void cospifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<cospif>(NumElements, Out, X);
}
+__gpu_kernel void cospif16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<cospif16>(NumElements, Out, X);
+}
+
__gpu_kernel void erffKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<erff>(NumElements, Out, X);
@@ -149,6 +199,11 @@ __gpu_kernel void expfKernel(const float *X, float *Out,
runKernelBody<expf>(NumElements, Out, X);
}
+__gpu_kernel void expf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<expf16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<exp10>(NumElements, Out, X);
@@ -159,6 +214,11 @@ __gpu_kernel void exp10fKernel(const float *X, float *Out,
runKernelBody<exp10f>(NumElements, Out, X);
}
+__gpu_kernel void exp10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<exp10f16>(NumElements, Out, X);
+}
+
__gpu_kernel void exp2Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<exp2>(NumElements, Out, X);
@@ -169,6 +229,11 @@ __gpu_kernel void exp2fKernel(const float *X, float *Out,
runKernelBody<exp2f>(NumElements, Out, X);
}
+__gpu_kernel void exp2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<exp2f16>(NumElements, Out, X);
+}
+
__gpu_kernel void expm1Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<expm1>(NumElements, Out, X);
@@ -179,6 +244,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out,
runKernelBody<expm1f>(NumElements, Out, X);
}
+__gpu_kernel void expm1f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<expm1f16>(NumElements, Out, X);
+}
+
__gpu_kernel void hypotKernel(const double *X, const double *Y, double *Out,
size_t NumElements) noexcept {
runKernelBody<hypot>(NumElements, Out, X, Y);
@@ -204,6 +274,11 @@ __gpu_kernel void logfKernel(const float *X, float *Out,
runKernelBody<logf>(NumElements, Out, X);
}
+__gpu_kernel void logf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<logf16>(NumElements, Out, X);
+}
+
__gpu_kernel void log10Kernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<log10>(NumElements, Out, X);
@@ -214,6 +289,11 @@ __gpu_kernel void log10fKernel(const float *X, float *Out,
runKernelBody<log10f>(NumElements, Out, X);
}
+__gpu_kernel void log10f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<log10f16>(NumElements, Out, X);
+}
+
__gpu_kernel void log1pKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<log1p>(NumElements, Out, X);
@@ -234,6 +314,11 @@ __gpu_kernel void log2fKernel(const float *X, float *Out,
runKernelBody<log2f>(NumElements, Out, X);
}
+__gpu_kernel void log2f16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<log2f16>(NumElements, Out, X);
+}
+
__gpu_kernel void powfKernel(const float *X, float *Y, float *Out,
size_t NumElements) noexcept {
runKernelBody<powf>(NumElements, Out, X, Y);
@@ -255,6 +340,11 @@ __gpu_kernel void sinfKernel(const float *X, float *Out,
runKernelBody<sinf>(NumElements, Out, X);
}
+__gpu_kernel void sinf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sinf16>(NumElements, Out, X);
+}
+
__gpu_kernel void sincosSinKernel(const double *X, double *Out,
size_t NumElements) noexcept {
runKernelBody<sincosSin>(NumElements, Out, X);
@@ -280,11 +370,21 @@ __gpu_kernel void sinhfKernel(const float *X, float *Out,
runKernelBody<sinhf>(NumElements, Out, X);
}
+__gpu_kernel void sinhf16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sinhf16>(NumElements, Out, X);
+}
+
__gpu_kernel void sinpifKernel(const float *X, float *Out,
size_t NumElements) noexcept {
runKernelBody<sinpif>(NumElements, Out, X);
}
+__gpu_kernel void sinpif16Kernel(const float16 *X, float16 *Out,
+ size_t NumElements) noexcept {
+ runKernelBody<sinpif16>(NumElements, Out, X);
+}
+
__...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155112
More information about the llvm-commits
mailing list