[libclc] f186041 - [libclc] Move sinh, cosh & tanh to the CLC library (#134063)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Apr 2 07:22:46 PDT 2025
Author: Fraser Cormack
Date: 2025-04-02T15:22:42+01:00
New Revision: f186041553f9ab2d49510c6690bb4faf96457d4a
URL: https://github.com/llvm/llvm-project/commit/f186041553f9ab2d49510c6690bb4faf96457d4a
DIFF: https://github.com/llvm/llvm-project/commit/f186041553f9ab2d49510c6690bb4faf96457d4a.diff
LOG: [libclc] Move sinh, cosh & tanh to the CLC library (#134063)
This commit also vectorizes the builtins.
Added:
libclc/clc/include/clc/math/clc_cosh.h
libclc/clc/include/clc/math/clc_sinh.h
libclc/clc/include/clc/math/clc_tanh.h
libclc/clc/lib/generic/math/clc_cosh.cl
libclc/clc/lib/generic/math/clc_cosh.inc
libclc/clc/lib/generic/math/clc_sinh.cl
libclc/clc/lib/generic/math/clc_sinh.inc
libclc/clc/lib/generic/math/clc_tanh.cl
libclc/clc/lib/generic/math/clc_tanh.inc
Modified:
libclc/clc/include/clc/math/tables.h
libclc/clc/lib/generic/SOURCES
libclc/clc/lib/generic/math/clc_tables.cl
libclc/generic/lib/math/cosh.cl
libclc/generic/lib/math/sinh.cl
libclc/generic/lib/math/tables.cl
libclc/generic/lib/math/tanh.cl
Removed:
################################################################################
diff --git a/libclc/clc/include/clc/math/clc_cosh.h b/libclc/clc/include/clc/math/clc_cosh.h
new file mode 100644
index 0000000000000..71e414ce28ac2
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_cosh.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_COSH_H__
+#define __CLC_MATH_CLC_COSH_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_cosh
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_COSH_H__
diff --git a/libclc/clc/include/clc/math/clc_sinh.h b/libclc/clc/include/clc/math/clc_sinh.h
new file mode 100644
index 0000000000000..da525b5cd0fe7
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_sinh.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_SINH_H__
+#define __CLC_MATH_CLC_SINH_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_sinh
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_SINH_H__
diff --git a/libclc/clc/include/clc/math/clc_tanh.h b/libclc/clc/include/clc/math/clc_tanh.h
new file mode 100644
index 0000000000000..972a31e248c67
--- /dev/null
+++ b/libclc/clc/include/clc/math/clc_tanh.h
@@ -0,0 +1,20 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __CLC_MATH_CLC_TANH_H__
+#define __CLC_MATH_CLC_TANH_H__
+
+#define __CLC_BODY <clc/math/unary_decl.inc>
+#define __CLC_FUNCTION __clc_tanh
+
+#include <clc/math/gentype.inc>
+
+#undef __CLC_BODY
+#undef __CLC_FUNCTION
+
+#endif // __CLC_MATH_CLC_TANH_H__
diff --git a/libclc/clc/include/clc/math/tables.h b/libclc/clc/include/clc/math/tables.h
index e06ee82d98355..fb172b0b8f221 100644
--- a/libclc/clc/include/clc/math/tables.h
+++ b/libclc/clc/include/clc/math/tables.h
@@ -62,7 +62,6 @@
TABLE_FUNCTION_DECL(float2, log2_tbl);
TABLE_FUNCTION_DECL(float2, log10_tbl);
TABLE_FUNCTION_DECL(uint4, pibits_tbl);
-TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_head);
CLC_TABLE_FUNCTION_DECL(float, log_inv_tbl_ep_tail);
@@ -74,6 +73,8 @@ CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_head);
CLC_TABLE_FUNCTION_DECL(float, exp_tbl_ep_tail);
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_head);
CLC_TABLE_FUNCTION_DECL(float, cbrt_tbl_tail);
+CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_head);
+CLC_TABLE_FUNCTION_DECL(float, sinhcosh_tbl_tail);
#ifdef cl_khr_fp64
@@ -85,8 +86,10 @@ CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_head);
CLC_TABLE_FUNCTION_DECL(double, atan_jby256_tbl_tail);
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_head);
CLC_TABLE_FUNCTION_DECL(double, two_to_jby64_ep_tbl_tail);
-TABLE_FUNCTION_DECL(double2, sinh_tbl);
-TABLE_FUNCTION_DECL(double2, cosh_tbl);
+CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_head);
+CLC_TABLE_FUNCTION_DECL(double, sinh_tbl_tail);
+CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_head);
+CLC_TABLE_FUNCTION_DECL(double, cosh_tbl_tail);
CLC_TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_head);
CLC_TABLE_FUNCTION_DECL(double, cbrt_dbl_tbl_tail);
diff --git a/libclc/clc/lib/generic/SOURCES b/libclc/clc/lib/generic/SOURCES
index e7ddad3172373..25549af552a3c 100644
--- a/libclc/clc/lib/generic/SOURCES
+++ b/libclc/clc/lib/generic/SOURCES
@@ -31,6 +31,7 @@ math/clc_atanpi.cl
math/clc_cbrt.cl
math/clc_ceil.cl
math/clc_copysign.cl
+math/clc_cosh.cl
math/clc_cospi.cl
math/clc_ep_log.cl
math/clc_exp.cl
@@ -76,10 +77,12 @@ math/clc_rootn.cl
math/clc_round.cl
math/clc_rsqrt.cl
math/clc_sincos_helpers.cl
+math/clc_sinh.cl
math/clc_sinpi.cl
math/clc_sqrt.cl
math/clc_sw_fma.cl
math/clc_tables.cl
+math/clc_tanh.cl
math/clc_tanpi.cl
math/clc_tgamma.cl
math/clc_trunc.cl
diff --git a/libclc/clc/lib/generic/math/clc_cosh.cl b/libclc/clc/lib/generic/math/clc_cosh.cl
new file mode 100644
index 0000000000000..4da78de2714e3
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_cosh.cl
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/float/definitions.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_copysign.h>
+#include <clc/math/clc_exp.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_cosh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_cosh.inc b/libclc/clc/lib/generic/math/clc_cosh.inc
new file mode 100644
index 0000000000000..a9fa1bb14140c
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_cosh.inc
@@ -0,0 +1,199 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
+ // After dealing with special cases the computation is split into regions as
+ // follows. abs(x) >= max_cosh_arg: cosh(x) = sign(x)*Inf abs(x) >=
+ // small_threshold: cosh(x) = sign(x)*exp(abs(x))/2 computed using the
+ // splitexp and scaleDouble functions as for exp_amd().
+ // abs(x) < small_threshold:
+ // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+ // cosh(x) is then z.
+
+ const __CLC_GENTYPE max_cosh_arg = 0x1.65a9fap+6f;
+ const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
+
+ __CLC_UINTN ux = __CLC_AS_UINTN(x);
+ __CLC_GENTYPE y = __clc_fabs(x);
+ __CLC_UINTN aux = __CLC_AS_UINTN(y);
+
+ // Find the integer part y0 of y and the increment dy = y - y0. We then
+ // compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) z = cosh(y) =
+ // cosh(y0)cosh(dy) + sinh(y0)sinh(dy) where sinh(y0) and cosh(y0) are
+ // tabulated above.
+
+ __CLC_INTN ind = __CLC_CONVERT_INTN(y);
+ ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
+
+ __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+ __CLC_GENTYPE dy2 = dy * dy;
+
+ __CLC_GENTYPE sdy = __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(dy2,
+ __clc_mad(dy2, 0.7746188980094184251527126e-12f,
+ 0.160576793121939886190847e-9f),
+ 0.250521176994133472333666e-7f),
+ 0.275573191913636406057211e-5f),
+ 0.198412698413242405162014e-3f),
+ 0.833333333333329931873097e-2f),
+ 0.166666666666666667013899e0f);
+ sdy = __clc_mad(sdy, dy * dy2, dy);
+
+ __CLC_GENTYPE cdy = __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(dy2,
+ __clc_mad(dy2, 0.1163921388172173692062032e-10f,
+ 0.208744349831471353536305e-8f),
+ 0.275573350756016588011357e-6f),
+ 0.248015872460622433115785e-4f),
+ 0.138888888889814854814536e-2f),
+ 0.416666666666660876512776e-1f),
+ 0.500000000000000005911074e0f);
+ cdy = __clc_mad(cdy, dy2, 1.0f);
+
+ __CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
+ __CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
+ __CLC_GENTYPE z = __clc_mad(sinhcoshh, sdy, sinhcosht * cdy);
+
+ // When exp(-x) is insignificant compared to exp(x), return exp(x)/2
+ __CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
+ __CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
+ z = y >= small_threshold ? zsmall : z;
+
+ // Corner cases
+ z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_UINTN)PINFBITPATT_SP32) : z;
+ z = aux > PINFBITPATT_SP32 ? __CLC_GENTYPE_NAN : z;
+ z = aux < 0x38800000 ? 1.0f : z;
+
+ return z;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
+ // After dealing with special cases the computation is split into
+ // regions as follows:
+ //
+ // abs(x) >= max_cosh_arg:
+ // cosh(x) = sign(x)*Inf
+ //
+ // abs(x) >= small_threshold:
+ // cosh(x) = sign(x)*exp(abs(x))/2 computed using the
+ // splitexp and scaleDouble functions as for exp_amd().
+ //
+ // abs(x) < small_threshold:
+ // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+ // cosh(x) is then sign(x)*z.
+
+ // This is ln(2^1025) = 0x408633ce8fb9f87e
+ const __CLC_GENTYPE max_cosh_arg = 7.10475860073943977113e+02;
+
+ // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
+ const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
+
+ __CLC_GENTYPE y = __clc_fabs(x);
+
+ // In this range we find the integer part y0 of y
+ // and the increment dy = y - y0. We then compute
+ // z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
+ // where sinh(y0) and cosh(y0) are tabulated above.
+
+ __CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
+ __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+ __CLC_GENTYPE dy2 = dy * dy;
+
+ __CLC_GENTYPE sdy =
+ dy * dy2 *
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(dy2,
+ __clc_fma(dy2, 0.7746188980094184251527126e-12,
+ 0.160576793121939886190847e-9),
+ 0.250521176994133472333666e-7),
+ 0.275573191913636406057211e-5),
+ 0.198412698413242405162014e-3),
+ 0.833333333333329931873097e-2),
+ 0.166666666666666667013899e0);
+
+ __CLC_GENTYPE cdy =
+ dy2 *
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(dy2,
+ __clc_fma(dy2, 0.1163921388172173692062032e-10,
+ 0.208744349831471353536305e-8),
+ 0.275573350756016588011357e-6),
+ 0.248015872460622433115785e-4),
+ 0.138888888889814854814536e-2),
+ 0.416666666666660876512776e-1),
+ 0.500000000000000005911074e0);
+
+ // At this point sinh(dy) is approximated by dy + sdy,
+ // and cosh(dy) is approximated by 1 + cdy.
+ __CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
+ __CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
+ __CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
+ __CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
+
+ __CLC_GENTYPE z =
+ __clc_fma(
+ sl, dy,
+ __clc_fma(sl, sdy,
+ __clc_fma(cl, cdy,
+ __clc_fma(st, dy, __clc_fma(st, sdy, ct * cdy)) +
+ ct))) +
+ cl;
+
+ // Other cases
+ z = y < 0x1.0p-28 ? 1.0 : z;
+
+ __CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
+ t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
+ z = y >= small_threshold ? t : z;
+
+ z = y >= max_cosh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
+
+ z = __clc_isinf(x) || __clc_isnan(x) ? y : z;
+
+ return z;
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_cosh(__CLC_GENTYPE x) {
+ return __CLC_CONVERT_GENTYPE(__clc_cosh(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_sinh.cl b/libclc/clc/lib/generic/math/clc_sinh.cl
new file mode 100644
index 0000000000000..bd0b488b16047
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sinh.cl
@@ -0,0 +1,23 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_copysign.h>
+#include <clc/math/clc_exp.h>
+#include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_sinh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_sinh.inc b/libclc/clc/lib/generic/math/clc_sinh.inc
new file mode 100644
index 0000000000000..799cc32105084
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_sinh.inc
@@ -0,0 +1,201 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
+ // After dealing with special cases the computation is split into regions as
+ // follows. abs(x) >= max_sinh_arg: sinh(x) = sign(x)*Inf abs(x) >=
+ // small_threshold: sinh(x) = sign(x)*exp(abs(x))/2 computed using the
+ // splitexp and scaleDouble functions as for exp_amd(). abs(x) <
+ // small_threshold: compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+ // sinh(x) is then sign(x)*z.
+
+ const __CLC_GENTYPE max_sinh_arg = 0x1.65a9fap+6f;
+ const __CLC_GENTYPE small_threshold = 0x1.0a2b24p+3f;
+
+ __CLC_UINTN ux = __CLC_AS_UINTN(x);
+ __CLC_GENTYPE y = __clc_fabs(x);
+ __CLC_UINTN aux = __CLC_AS_UINTN(y);
+ __CLC_UINTN xs = ux ^ aux;
+
+ // We find the integer part y0 of y and the increment dy = y - y0. We then
+ // compute z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) where sinh(y0)
+ // and cosh(y0) are tabulated above.
+ __CLC_INTN ind = __CLC_CONVERT_INTN(y);
+ ind = __CLC_CONVERT_UINTN(ind) > 36U ? 0 : ind;
+
+ __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+ __CLC_GENTYPE dy2 = dy * dy;
+
+ __CLC_GENTYPE sdy = __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(dy2,
+ __clc_mad(dy2, 0.7746188980094184251527126e-12f,
+ 0.160576793121939886190847e-9f),
+ 0.250521176994133472333666e-7f),
+ 0.275573191913636406057211e-5f),
+ 0.198412698413242405162014e-3f),
+ 0.833333333333329931873097e-2f),
+ 0.166666666666666667013899e0f);
+ sdy = __clc_mad(sdy, dy * dy2, dy);
+
+ __CLC_GENTYPE cdy = __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(
+ dy2,
+ __clc_mad(dy2,
+ __clc_mad(dy2, 0.1163921388172173692062032e-10f,
+ 0.208744349831471353536305e-8f),
+ 0.275573350756016588011357e-6f),
+ 0.248015872460622433115785e-4f),
+ 0.138888888889814854814536e-2f),
+ 0.416666666666660876512776e-1f),
+ 0.500000000000000005911074e0f);
+ cdy = __clc_mad(cdy, dy2, 1.0f);
+
+ __CLC_GENTYPE sinhcoshh = USE_TABLE(sinhcosh_tbl_head, ind);
+ __CLC_GENTYPE sinhcosht = USE_TABLE(sinhcosh_tbl_tail, ind);
+ __CLC_GENTYPE z = __clc_mad(sinhcosht, sdy, sinhcoshh * cdy);
+ z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z));
+
+ // When y is large enough so that the negative exponential is negligible,
+ // so sinh(y) is approximated by sign(x)*exp(y)/2.
+ __CLC_GENTYPE t = __clc_exp(y - 0x1.62e500p-1f);
+ __CLC_GENTYPE zsmall = __clc_mad(0x1.a0210ep-18f, t, t);
+ zsmall = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(zsmall));
+ z = y >= small_threshold ? zsmall : z;
+
+ // Corner cases
+ __CLC_GENTYPE zinf = __CLC_AS_GENTYPE(PINFBITPATT_SP32 | xs);
+ z = y >= max_sinh_arg ? zinf : z;
+ z = aux > PINFBITPATT_SP32 || aux < 0x38800000U ? x : z;
+
+ return z;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
+ // After dealing with special cases the computation is split into
+ // regions as follows:
+ //
+ // abs(x) >= max_sinh_arg:
+ // sinh(x) = sign(x)*Inf
+ //
+ // abs(x) >= small_threshold:
+ // sinh(x) = sign(x)*exp(abs(x))/2 computed using the
+ // splitexp and scaleDouble functions as for exp_amd().
+ //
+ // abs(x) < small_threshold:
+ // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
+ // sinh(x) is then sign(x)*z.
+
+ // 0x408633ce8fb9f87e
+ const __CLC_GENTYPE max_sinh_arg = 7.10475860073943977113e+02;
+
+ // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
+ const __CLC_GENTYPE small_threshold = 0x1.2b708872320e2p+4;
+
+ __CLC_GENTYPE y = __clc_fabs(x);
+
+ // In this range we find the integer part y0 of y
+ // and the increment dy = y - y0. We then compute
+ // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
+ // where sinh(y0) and cosh(y0) are obtained from tables
+
+ __CLC_INTN ind = __clc_min(__CLC_CONVERT_INTN(y), 36);
+ __CLC_GENTYPE dy = y - __CLC_CONVERT_GENTYPE(ind);
+ __CLC_GENTYPE dy2 = dy * dy;
+
+ __CLC_GENTYPE sdy =
+ dy * dy2 *
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(dy2,
+ __clc_fma(dy2, 0.7746188980094184251527126e-12,
+ 0.160576793121939886190847e-9),
+ 0.250521176994133472333666e-7),
+ 0.275573191913636406057211e-5),
+ 0.198412698413242405162014e-3),
+ 0.833333333333329931873097e-2),
+ 0.166666666666666667013899e0);
+
+ __CLC_GENTYPE cdy =
+ dy2 *
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(
+ dy2,
+ __clc_fma(dy2,
+ __clc_fma(dy2, 0.1163921388172173692062032e-10,
+ 0.208744349831471353536305e-8),
+ 0.275573350756016588011357e-6),
+ 0.248015872460622433115785e-4),
+ 0.138888888889814854814536e-2),
+ 0.416666666666660876512776e-1),
+ 0.500000000000000005911074e0);
+
+ // At this point sinh(dy) is approximated by dy + sdy.
+ // Shift some significant bits from dy to sdy.
+ __CLC_GENTYPE sdy1 =
+ __CLC_AS_GENTYPE(__CLC_AS_ULONGN(dy) & 0xfffffffff8000000UL);
+ __CLC_GENTYPE sdy2 = sdy + (dy - sdy1);
+
+ __CLC_GENTYPE cl = USE_TABLE(cosh_tbl_head, ind);
+ __CLC_GENTYPE ct = USE_TABLE(cosh_tbl_tail, ind);
+ __CLC_GENTYPE sl = USE_TABLE(sinh_tbl_head, ind);
+ __CLC_GENTYPE st = USE_TABLE(sinh_tbl_tail, ind);
+
+ __CLC_GENTYPE z =
+ __clc_fma(cl, sdy1,
+ __clc_fma(sl, cdy,
+ __clc_fma(cl, sdy2,
+ __clc_fma(ct, sdy1,
+ __clc_fma(st, cdy, ct * sdy2)) +
+ st))) +
+ sl;
+
+ // Other cases
+ z = (y < 0x1.0p-28) || __clc_isnan(x) || __clc_isinf(x) ? y : z;
+
+ __CLC_GENTYPE t = __clc_exp(y - 0x1.62e42fefa3800p-1);
+ t = __clc_fma(t, -0x1.ef35793c76641p-45, t);
+ z = y >= small_threshold ? t : z;
+ z = y >= max_sinh_arg ? __CLC_AS_GENTYPE((__CLC_ULONGN)PINFBITPATT_DP64) : z;
+
+ return __clc_copysign(z, x);
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_sinh(__CLC_GENTYPE x) {
+ return __CLC_CONVERT_GENTYPE(__clc_sinh(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
diff --git a/libclc/clc/lib/generic/math/clc_tables.cl b/libclc/clc/lib/generic/math/clc_tables.cl
index c5eb21a6d5ed7..6280413ca48ce 100644
--- a/libclc/clc/lib/generic/math/clc_tables.cl
+++ b/libclc/clc/lib/generic/math/clc_tables.cl
@@ -339,6 +339,37 @@ DECLARE_TABLE(float, CBRT_TBL_TAIL, 129) = {
CLC_TABLE_FUNCTION(float, CBRT_TBL_TAIL, cbrt_tbl_tail);
+// Tabulated values of sinh(i) and cosh(i) for i = 0,...,36.
+DECLARE_TABLE(float, SINHCOSH_TBL_HEAD, 37) = {
+ 0x0.000000p+0f, 0x1.2cd9fcp+0f, 0x1.d03cf6p+1f, 0x1.40926ep+3f,
+ 0x1.b4a380p+4f, 0x1.28d016p+6f, 0x1.936d22p+7f, 0x1.122876p+9f,
+ 0x1.749ea6p+10f, 0x1.fa7158p+11f, 0x1.5829dcp+13f, 0x1.d3c448p+14f,
+ 0x1.3de166p+16f, 0x1.b00b5ap+17f, 0x1.259ac4p+19f, 0x1.8f0ccap+20f,
+ 0x1.0f2ebep+22f, 0x1.709348p+23f, 0x1.f4f220p+24f, 0x1.546d90p+26f,
+ 0x1.ceb088p+27f, 0x1.3a6e20p+29f, 0x1.ab5adcp+30f, 0x1.226af4p+32f,
+ 0x1.8ab7fcp+33f, 0x1.0c3d3ap+35f, 0x1.6c9326p+36f, 0x1.ef8230p+37f,
+ 0x1.50bba4p+39f, 0x1.c9aae4p+40f, 0x1.370470p+42f, 0x1.a6b766p+43f,
+ 0x1.1f43fcp+45f, 0x1.866f34p+46f, 0x1.0953e2p+48f, 0x1.689e22p+49f,
+ 0x1.ea215ap+50f,
+};
+
+CLC_TABLE_FUNCTION(float, SINHCOSH_TBL_HEAD, sinhcosh_tbl_head);
+
+DECLARE_TABLE(float, SINHCOSH_TBL_TAIL, 37) = {
+ 0x1.000000p+0f, 0x1.8b0756p+0f, 0x1.e18fa0p+1f, 0x1.422a4ap+3f,
+ 0x1.b4ee86p+4f, 0x1.28d6fcp+6f, 0x1.936e68p+7f, 0x1.122894p+9f,
+ 0x1.749eaap+10f, 0x1.fa7158p+11f, 0x1.5829dep+13f, 0x1.d3c448p+14f,
+ 0x1.3de166p+16f, 0x1.b00b5ap+17f, 0x1.259ac4p+19f, 0x1.8f0ccap+20f,
+ 0x1.0f2ebep+22f, 0x1.709348p+23f, 0x1.f4f220p+24f, 0x1.546d90p+26f,
+ 0x1.ceb088p+27f, 0x1.3a6e20p+29f, 0x1.ab5adcp+30f, 0x1.226af4p+32f,
+ 0x1.8ab7fcp+33f, 0x1.0c3d3ap+35f, 0x1.6c9326p+36f, 0x1.ef8230p+37f,
+ 0x1.50bba4p+39f, 0x1.c9aae4p+40f, 0x1.370470p+42f, 0x1.a6b766p+43f,
+ 0x1.1f43fcp+45f, 0x1.866f34p+46f, 0x1.0953e2p+48f, 0x1.689e22p+49f,
+ 0x1.ea215ap+50f,
+};
+
+CLC_TABLE_FUNCTION(float, SINHCOSH_TBL_TAIL, sinhcosh_tbl_tail);
+
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
@@ -1279,4 +1310,73 @@ DECLARE_TABLE(double, CBRT_REM_TBL_TAIL, 5) = {
CLC_TABLE_FUNCTION(double, CBRT_REM_TBL_TAIL, cbrt_rem_tbl_tail);
+DECLARE_TABLE(double, SINH_TBL_HEAD, 37) = {
+ 0x0.0000000000000p+0, 0x1.2cd9fc0000000p+0, 0x1.d03cf60000000p+1,
+ 0x1.40926e0000000p+3, 0x1.b4a3800000000p+4, 0x1.28d0160000000p+6,
+ 0x1.936d228000000p+7, 0x1.1228768000000p+9, 0x1.749ea50000000p+10,
+ 0x1.fa71570000000p+11, 0x1.5829dc8000000p+13, 0x1.d3c4488000000p+14,
+ 0x1.3de1650000000p+16, 0x1.b00b590000000p+17, 0x1.259ac48000000p+19,
+ 0x1.8f0cca8000000p+20, 0x1.0f2ebd0000000p+22, 0x1.7093488000000p+23,
+ 0x1.f4f2208000000p+24, 0x1.546d8f8000000p+26, 0x1.ceb0888000000p+27,
+ 0x1.3a6e1f8000000p+29, 0x1.ab5adb8000000p+30, 0x1.226af30000000p+32,
+ 0x1.8ab7fb0000000p+33, 0x1.0c3d390000000p+35, 0x1.6c93268000000p+36,
+ 0x1.ef822f0000000p+37, 0x1.50bba30000000p+39, 0x1.c9aae40000000p+40,
+ 0x1.3704708000000p+42, 0x1.a6b7658000000p+43, 0x1.1f43fc8000000p+45,
+ 0x1.866f348000000p+46, 0x1.0953e28000000p+48, 0x1.689e220000000p+49,
+ 0x1.ea215a0000000p+50,
+};
+
+DECLARE_TABLE(double, SINH_TBL_TAIL, 37) = {
+ 0x0.0000000000000p+0, 0x1.13ae6096a0092p-26, 0x1.db70cfb79a640p-26,
+ 0x1.c2526b66dc067p-23, 0x1.b81b18647f380p-23, 0x1.bc1cdd1e1eb08p-20,
+ 0x1.d9f201534fb09p-19, 0x1.d1c064a4e9954p-18, 0x1.4eca65d06ea74p-18,
+ 0x1.0c259bcc0ecc5p-15, 0x1.b5a6647cf9016p-13, 0x1.9691adefb0870p-15,
+ 0x1.3410fc29cde38p-10, 0x1.6a31a50b6fb3cp-11, 0x1.7defc71805c40p-10,
+ 0x1.eb49fd80e0babp-6, 0x1.4fffc7bcd5920p-7, 0x1.03a93b6c63435p-3,
+ 0x1.1940bb255fd1cp-4, 0x1.ed26e14260b50p-2, 0x1.b47401fc9f2a2p+0,
+ 0x1.67bb3f55634f1p+3, 0x1.c435ff8194ddcp+2, 0x1.d8fee052ba63ap+5,
+ 0x1.51d7edccde3f6p+7, 0x1.04b1644557d1ap+8, 0x1.6a6b5ca0a9dc4p+8,
+ 0x1.fd9cc72249abap+11, 0x1.e58de693edab5p+13, 0x1.8c70158ac6363p+14,
+ 0x1.7614764f43e20p+15, 0x1.6337db36fc718p+17, 0x1.12d98b1f611e2p+19,
+ 0x1.392bc108b37ccp+19, 0x1.ce87bdc3473dcp+22, 0x1.bc8d5ae99ad14p+21,
+ 0x1.d20d76744835cp+22,
+};
+
+DECLARE_TABLE(double, COSH_TBL_HEAD, 37) = {
+ 0x1.0000000000000p+0, 0x1.8b07550000000p+0, 0x1.e18fa08000000p+1,
+ 0x1.422a490000000p+3, 0x1.b4ee858000000p+4, 0x1.28d6fc8000000p+6,
+ 0x1.936e678000000p+7, 0x1.1228948000000p+9, 0x1.749eaa8000000p+10,
+ 0x1.fa71580000000p+11, 0x1.5829dd0000000p+13, 0x1.d3c4488000000p+14,
+ 0x1.3de1650000000p+16, 0x1.b00b590000000p+17, 0x1.259ac48000000p+19,
+ 0x1.8f0cca8000000p+20, 0x1.0f2ebd0000000p+22, 0x1.7093488000000p+23,
+ 0x1.f4f2208000000p+24, 0x1.546d8f8000000p+26, 0x1.ceb0888000000p+27,
+ 0x1.3a6e1f8000000p+29, 0x1.ab5adb8000000p+30, 0x1.226af30000000p+32,
+ 0x1.8ab7fb0000000p+33, 0x1.0c3d390000000p+35, 0x1.6c93268000000p+36,
+ 0x1.ef822f0000000p+37, 0x1.50bba30000000p+39, 0x1.c9aae40000000p+40,
+ 0x1.3704708000000p+42, 0x1.a6b7658000000p+43, 0x1.1f43fc8000000p+45,
+ 0x1.866f348000000p+46, 0x1.0953e28000000p+48, 0x1.689e220000000p+49,
+ 0x1.ea215a0000000p+50,
+};
+
+DECLARE_TABLE(double, COSH_TBL_TAIL, 37) = {
+ 0x0.0000000000000p+0, 0x1.d9f5504c2bd28p-28, 0x1.7cb66f0a4c9fdp-25,
+ 0x1.f58617928e588p-23, 0x1.bc7d000c38d48p-25, 0x1.f7f9d4e329998p-21,
+ 0x1.6e6e464885269p-19, 0x1.ba3a8b946c154p-19, 0x1.3f4e76110d5a4p-18,
+ 0x1.17622515a3e2bp-15, 0x1.4dc4b528af3d0p-17, 0x1.1156278615e10p-14,
+ 0x1.35ad50ed821f5p-10, 0x1.6b61055f2935cp-11, 0x1.7e2794a601240p-10,
+ 0x1.eb4b45f6aadd3p-6, 0x1.5000b967b3698p-7, 0x1.03a940fadc092p-3,
+ 0x1.1940bf3bf874cp-4, 0x1.ed26e1a2a2110p-2, 0x1.b4740205796d6p+0,
+ 0x1.67bb3f55cb85dp+3, 0x1.c435ff81e18acp+2, 0x1.d8fee052bdea4p+5,
+ 0x1.51d7edccde926p+7, 0x1.04b1644557e0ep+8, 0x1.6a6b5ca0a9e1cp+8,
+ 0x1.fd9cc72249abep+11, 0x1.e58de693edab5p+13, 0x1.8c70158ac6364p+14,
+ 0x1.7614764f43e20p+15, 0x1.6337db36fc718p+17, 0x1.12d98b1f611e2p+19,
+ 0x1.392bc108b37ccp+19, 0x1.ce87bdc3473dcp+22, 0x1.bc8d5ae99ad14p+21,
+ 0x1.d20d76744835cp+22,
+};
+
+CLC_TABLE_FUNCTION(double, SINH_TBL_HEAD, sinh_tbl_head);
+CLC_TABLE_FUNCTION(double, SINH_TBL_TAIL, sinh_tbl_tail);
+CLC_TABLE_FUNCTION(double, COSH_TBL_HEAD, cosh_tbl_head);
+CLC_TABLE_FUNCTION(double, COSH_TBL_TAIL, cosh_tbl_tail);
+
#endif // cl_khr_fp64
diff --git a/libclc/clc/lib/generic/math/clc_tanh.cl b/libclc/clc/lib/generic/math/clc_tanh.cl
new file mode 100644
index 0000000000000..aedcb0c38d5e3
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_tanh.cl
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <clc/clc_convert.h>
+#include <clc/internal/clc.h>
+#include <clc/math/clc_exp.h>
+#include <clc/math/clc_fma.h>
+#include <clc/math/clc_mad.h>
+#include <clc/math/math.h>
+#include <clc/math/tables.h>
+#include <clc/relational/clc_isinf.h>
+#include <clc/relational/clc_isnan.h>
+#include <clc/shared/clc_min.h>
+
+#define __CLC_BODY <clc_tanh.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/clc/lib/generic/math/clc_tanh.inc b/libclc/clc/lib/generic/math/clc_tanh.inc
new file mode 100644
index 0000000000000..a25fd58fcbeaf
--- /dev/null
+++ b/libclc/clc/lib/generic/math/clc_tanh.inc
@@ -0,0 +1,137 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#if __CLC_FPSIZE == 32
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
+ // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
+ // to the following three formulae:
+ // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
+ // 2. (1 - (2/(exp(2*x) + 1 )))
+ // 3. (exp(2*x) - 1)/(exp(2*x) + 1)
+ // but computationally, some formulae are better on some ranges.
+
+ const __CLC_GENTYPE large_threshold = 0x1.0a2b24p+3f;
+
+ __CLC_UINTN ux = __CLC_AS_UINTN(x);
+ __CLC_UINTN aux = ux & EXSIGNBIT_SP32;
+ __CLC_UINTN xs = ux ^ aux;
+
+ __CLC_GENTYPE y = __CLC_AS_GENTYPE(aux);
+ __CLC_GENTYPE y2 = y * y;
+
+ __CLC_GENTYPE a1 = __clc_mad(
+ y2, __clc_mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
+ -0.28192806108402678e0F);
+ __CLC_GENTYPE b1 =
+ __clc_mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
+
+ __CLC_GENTYPE a2 = __clc_mad(
+ y2, __clc_mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
+ -0.24069858695196524e0F);
+ __CLC_GENTYPE b2 =
+ __clc_mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
+
+ __CLC_INTN c = y < 0.9f;
+ __CLC_GENTYPE a = c ? a1 : a2;
+ __CLC_GENTYPE b = c ? b1 : b2;
+ __CLC_GENTYPE zlo = __clc_mad(MATH_DIVIDE(a, b), y * y2, y);
+
+ __CLC_GENTYPE p = __clc_exp(2.0f * y) + 1.0f;
+ __CLC_GENTYPE zhi = 1.0F - MATH_DIVIDE(2.0F, p);
+
+ __CLC_GENTYPE z = y <= 1.0f ? zlo : zhi;
+ z = __CLC_AS_GENTYPE(xs | __CLC_AS_UINTN(z));
+
+ // Edge cases
+ __CLC_GENTYPE sone = __CLC_AS_GENTYPE(0x3f800000U | xs);
+ z = y > large_threshold ? sone : z;
+ z = aux < 0x39000000 || aux > 0x7f800000 ? x : z;
+
+ return z;
+}
+
+#elif __CLC_FPSIZE == 64
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
+ // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
+ // to the following three formulae:
+ // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
+ // 2. (1 - (2/(exp(2*x) + 1 )))
+ // 3. (exp(2*x) - 1)/(exp(2*x) + 1)
+ // but computationally, some formulae are better on some ranges.
+
+ // The point at which e^-x is insignificant compared to e^x = ln(2^27)
+ const __CLC_GENTYPE large_threshold = 0x1.2b708872320e2p+4;
+
+ __CLC_ULONGN ux = __CLC_AS_ULONGN(x);
+ __CLC_ULONGN ax = ux & ~SIGNBIT_DP64;
+ __CLC_ULONGN sx = ux ^ ax;
+ __CLC_GENTYPE y = __CLC_AS_GENTYPE(ax);
+ __CLC_GENTYPE y2 = y * y;
+
+ // y < 0.9
+ __CLC_GENTYPE znl =
+ __clc_fma(y2,
+ __clc_fma(y2,
+ __clc_fma(y2, -0.142077926378834722618091e-7,
+ -0.200047621071909498730453e-3),
+ -0.176016349003044679402273e-1),
+ -0.274030424656179760118928e0);
+
+ __CLC_GENTYPE zdl =
+ __clc_fma(y2,
+ __clc_fma(y2,
+ __clc_fma(y2, 0.2091140262529164482568557e-3,
+ 0.201562166026937652780575e-1),
+ 0.381641414288328849317962e0),
+ 0.822091273968539282568011e0);
+
+ // 0.9 <= y <= 1
+ __CLC_GENTYPE znm =
+ __clc_fma(y2,
+ __clc_fma(y2,
+ __clc_fma(y2, -0.115475878996143396378318e-7,
+ -0.165597043903549960486816e-3),
+ -0.146173047288731678404066e-1),
+ -0.227793870659088295252442e0);
+
+ __CLC_GENTYPE zdm =
+ __clc_fma(y2,
+ __clc_fma(y2,
+ __clc_fma(y2, 0.173076050126225961768710e-3,
+ 0.167358775461896562588695e-1),
+ 0.317204558977294374244770e0),
+ 0.683381611977295894959554e0);
+
+ __CLC_LONGN c = y < 0.9;
+ __CLC_GENTYPE zn = c ? znl : znm;
+ __CLC_GENTYPE zd = c ? zdl : zdm;
+ __CLC_GENTYPE z = y + y * y2 * MATH_DIVIDE(zn, zd);
+
+ // y > 1
+ __CLC_GENTYPE p = __clc_exp(2.0 * y) + 1.0;
+ __CLC_GENTYPE zg = 1.0 - 2.0 / p;
+
+ z = y > 1.0 ? zg : z;
+
+ // Other cases
+ z = y < 0x1.0p-28 || ax > PINFBITPATT_DP64 ? x : z;
+
+ z = y > large_threshold ? 1.0 : z;
+
+ return __CLC_AS_GENTYPE(sx | __CLC_AS_ULONGN(z));
+}
+
+#elif __CLC_FPSIZE == 16
+
+_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE __clc_tanh(__CLC_GENTYPE x) {
+ return __CLC_CONVERT_GENTYPE(__clc_tanh(__CLC_CONVERT_FLOATN(x)));
+}
+
+#endif
diff --git a/libclc/generic/lib/math/cosh.cl b/libclc/generic/lib/math/cosh.cl
index 6d391b4f3a71f..870c56029f338 100644
--- a/libclc/generic/lib/math/cosh.cl
+++ b/libclc/generic/lib/math/cosh.cl
@@ -7,179 +7,8 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
+#include <clc/math/clc_cosh.h>
-_CLC_OVERLOAD _CLC_DEF float cosh(float x) {
-
- // After dealing with special cases the computation is split into regions as follows.
- // abs(x) >= max_cosh_arg:
- // cosh(x) = sign(x)*Inf
- // abs(x) >= small_threshold:
- // cosh(x) = sign(x)*exp(abs(x))/2 computed using the
- // splitexp and scaleDouble functions as for exp_amd().
- // abs(x) < small_threshold:
- // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
- // cosh(x) is then z.
-
- const float max_cosh_arg = 0x1.65a9fap+6f;
- const float small_threshold = 0x1.0a2b24p+3f;
-
- uint ux = as_uint(x);
- uint aux = ux & EXSIGNBIT_SP32;
- float y = as_float(aux);
-
- // Find the integer part y0 of y and the increment dy = y - y0. We then compute
- // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
- // z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
- // where sinh(y0) and cosh(y0) are tabulated above.
-
- int ind = (int)y;
- ind = (uint)ind > 36U ? 0 : ind;
-
- float dy = y - ind;
- float dy2 = dy * dy;
-
- float sdy = mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),
- 0.250521176994133472333666e-7f),
- 0.275573191913636406057211e-5f),
- 0.198412698413242405162014e-3f),
- 0.833333333333329931873097e-2f),
- 0.166666666666666667013899e0f);
- sdy = mad(sdy, dy*dy2, dy);
-
- float cdy = mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),
- 0.275573350756016588011357e-6f),
- 0.248015872460622433115785e-4f),
- 0.138888888889814854814536e-2f),
- 0.416666666666660876512776e-1f),
- 0.500000000000000005911074e0f);
- cdy = mad(cdy, dy2, 1.0f);
-
- float2 tv = USE_TABLE(sinhcosh_tbl, ind);
- float z = mad(tv.s0, sdy, tv.s1 * cdy);
-
- // When exp(-x) is insignificant compared to exp(x), return exp(x)/2
- float t = exp(y - 0x1.62e500p-1f);
- float zsmall = mad(0x1.a0210ep-18f, t, t);
- z = y >= small_threshold ? zsmall : z;
-
- // Corner cases
- z = y >= max_cosh_arg ? as_float(PINFBITPATT_SP32) : z;
- z = aux > PINFBITPATT_SP32 ? as_float(QNANBITPATT_SP32) : z;
- z = aux < 0x38800000 ? 1.0f : z;
-
- return z;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cosh, float);
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double cosh(double x) {
-
- // After dealing with special cases the computation is split into
- // regions as follows:
- //
- // abs(x) >= max_cosh_arg:
- // cosh(x) = sign(x)*Inf
- //
- // abs(x) >= small_threshold:
- // cosh(x) = sign(x)*exp(abs(x))/2 computed using the
- // splitexp and scaleDouble functions as for exp_amd().
- //
- // abs(x) < small_threshold:
- // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
- // cosh(x) is then sign(x)*z.
-
- // This is ln(2^1025)
- const double max_cosh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e
-
- // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
- const double small_threshold = 0x1.2b708872320e2p+4;
-
- double y = fabs(x);
-
- // In this range we find the integer part y0 of y
- // and the increment dy = y - y0. We then compute
- // z = cosh(y) = cosh(y0)cosh(dy) + sinh(y0)sinh(dy)
- // where sinh(y0) and cosh(y0) are tabulated above.
-
- int ind = min((int)y, 36);
- double dy = y - ind;
- double dy2 = dy * dy;
-
- double sdy = dy * dy2 *
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),
- 0.250521176994133472333666e-7),
- 0.275573191913636406057211e-5),
- 0.198412698413242405162014e-3),
- 0.833333333333329931873097e-2),
- 0.166666666666666667013899e0);
-
- double cdy = dy2 * fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),
- 0.275573350756016588011357e-6),
- 0.248015872460622433115785e-4),
- 0.138888888889814854814536e-2),
- 0.416666666666660876512776e-1),
- 0.500000000000000005911074e0);
-
- // At this point sinh(dy) is approximated by dy + sdy,
- // and cosh(dy) is approximated by 1 + cdy.
- double2 tv = USE_TABLE(cosh_tbl, ind);
- double cl = tv.s0;
- double ct = tv.s1;
- tv = USE_TABLE(sinh_tbl, ind);
- double sl = tv.s0;
- double st = tv.s1;
-
- double z = fma(sl, dy, fma(sl, sdy, fma(cl, cdy, fma(st, dy, fma(st, sdy, ct*cdy)) + ct))) + cl;
-
- // Other cases
- z = y < 0x1.0p-28 ? 1.0 : z;
-
- double t = exp(y - 0x1.62e42fefa3800p-1);
- t = fma(t, -0x1.ef35793c76641p-45, t);
- z = y >= small_threshold ? t : z;
-
- z = y >= max_cosh_arg ? as_double(PINFBITPATT_DP64) : z;
-
- z = isinf(x) | isnan(x) ? y : z;
-
- return z;
-
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cosh, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(cosh)
-
-#endif
+#define FUNCTION cosh
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/sinh.cl b/libclc/generic/lib/math/sinh.cl
index a889116bd3e6e..305f9bf647ff7 100644
--- a/libclc/generic/lib/math/sinh.cl
+++ b/libclc/generic/lib/math/sinh.cl
@@ -7,178 +7,8 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
-#include <clc/math/tables.h>
+#include <clc/math/clc_sinh.h>
-_CLC_OVERLOAD _CLC_DEF float sinh(float x)
-{
- // After dealing with special cases the computation is split into regions as follows.
- // abs(x) >= max_sinh_arg:
- // sinh(x) = sign(x)*Inf
- // abs(x) >= small_threshold:
- // sinh(x) = sign(x)*exp(abs(x))/2 computed using the splitexp and scaleDouble functions as for exp_amd().
- // abs(x) < small_threshold:
- // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
- // sinh(x) is then sign(x)*z.
-
- const float max_sinh_arg = 0x1.65a9fap+6f;
- const float small_threshold = 0x1.0a2b24p+3f;
-
- uint ux = as_uint(x);
- uint aux = ux & EXSIGNBIT_SP32;
- uint xs = ux ^ aux;
- float y = as_float(aux);
-
- // We find the integer part y0 of y and the increment dy = y - y0. We then compute
- // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
- // where sinh(y0) and cosh(y0) are tabulated above.
- int ind = (int) y;
- ind = (uint)ind > 36U ? 0 : ind;
-
- float dy = y - ind;
- float dy2 = dy * dy;
-
- float sdy = mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),
- 0.250521176994133472333666e-7f),
- 0.275573191913636406057211e-5f),
- 0.198412698413242405162014e-3f),
- 0.833333333333329931873097e-2f),
- 0.166666666666666667013899e0f);
- sdy = mad(sdy, dy*dy2, dy);
-
- float cdy = mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2,
- mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),
- 0.275573350756016588011357e-6f),
- 0.248015872460622433115785e-4f),
- 0.138888888889814854814536e-2f),
- 0.416666666666660876512776e-1f),
- 0.500000000000000005911074e0f);
- cdy = mad(cdy, dy2, 1.0f);
-
- float2 tv = USE_TABLE(sinhcosh_tbl, ind);
- float z = mad(tv.s1, sdy, tv.s0 * cdy);
- z = as_float(xs | as_uint(z));
-
- // When y is large enough so that the negative exponential is negligible,
- // so sinh(y) is approximated by sign(x)*exp(y)/2.
- float t = exp(y - 0x1.62e500p-1f);
- float zsmall = mad(0x1.a0210ep-18f, t, t);
- zsmall = as_float(xs | as_uint(zsmall));
- z = y >= small_threshold ? zsmall : z;
-
- // Corner cases
- float zinf = as_float(PINFBITPATT_SP32 | xs);
- z = y >= max_sinh_arg ? zinf : z;
- z = aux > PINFBITPATT_SP32 | aux < 0x38800000U ? x : z;
-
- return z;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinh, float);
-
-#ifdef cl_khr_fp64
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double sinh(double x)
-{
- // After dealing with special cases the computation is split into
- // regions as follows:
- //
- // abs(x) >= max_sinh_arg:
- // sinh(x) = sign(x)*Inf
- //
- // abs(x) >= small_threshold:
- // sinh(x) = sign(x)*exp(abs(x))/2 computed using the
- // splitexp and scaleDouble functions as for exp_amd().
- //
- // abs(x) < small_threshold:
- // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))
- // sinh(x) is then sign(x)*z.
-
- const double max_sinh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e
-
- // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)
- const double small_threshold = 0x1.2b708872320e2p+4;
-
- double y = fabs(x);
-
- // In this range we find the integer part y0 of y
- // and the increment dy = y - y0. We then compute
- // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)
- // where sinh(y0) and cosh(y0) are obtained from tables
-
- int ind = min((int)y, 36);
- double dy = y - ind;
- double dy2 = dy * dy;
-
- double sdy = dy * dy2 *
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),
- 0.250521176994133472333666e-7),
- 0.275573191913636406057211e-5),
- 0.198412698413242405162014e-3),
- 0.833333333333329931873097e-2),
- 0.166666666666666667013899e0);
-
- double cdy = dy2 * fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2,
- fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),
- 0.275573350756016588011357e-6),
- 0.248015872460622433115785e-4),
- 0.138888888889814854814536e-2),
- 0.416666666666660876512776e-1),
- 0.500000000000000005911074e0);
-
- // At this point sinh(dy) is approximated by dy + sdy.
- // Shift some significant bits from dy to sdy.
- double sdy1 = as_double(as_ulong(dy) & 0xfffffffff8000000UL);
- double sdy2 = sdy + (dy - sdy1);
-
- double2 tv = USE_TABLE(cosh_tbl, ind);
- double cl = tv.s0;
- double ct = tv.s1;
- tv = USE_TABLE(sinh_tbl, ind);
- double sl = tv.s0;
- double st = tv.s1;
-
- double z = fma(cl, sdy1, fma(sl, cdy, fma(cl, sdy2, fma(ct, sdy1, fma(st, cdy, ct*sdy2)) + st))) + sl;
-
- // Other cases
- z = (y < 0x1.0p-28) | isnan(x) | isinf(x) ? y : z;
-
- double t = exp(y - 0x1.62e42fefa3800p-1);
- t = fma(t, -0x1.ef35793c76641p-45, t);
- z = y >= small_threshold ? t : z;
- z = y >= max_sinh_arg ? as_double(PINFBITPATT_DP64) : z;
-
- return copysign(z, x);
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)
-
-#endif
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(sinh)
-
-#endif
+#define FUNCTION sinh
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
diff --git a/libclc/generic/lib/math/tables.cl b/libclc/generic/lib/math/tables.cl
index 16473caad7f23..3997b32a79043 100644
--- a/libclc/generic/lib/math/tables.cl
+++ b/libclc/generic/lib/math/tables.cl
@@ -289,139 +289,9 @@ DECLARE_TABLE(uchar, PIBITS_TBL, ) = {
230, 139, 2, 0, 0, 0, 0, 0, 0, 0
};
-// Tabulated values of sinh(i) and cosh(i) for i = 0,...,36.
-DECLARE_TABLE(float2, SINHCOSH_TBL, 37) = {
- (float2)(0x0.000000p+0f, 0x1.000000p+0f),
- (float2)(0x1.2cd9fcp+0f, 0x1.8b0756p+0f),
- (float2)(0x1.d03cf6p+1f, 0x1.e18fa0p+1f),
- (float2)(0x1.40926ep+3f, 0x1.422a4ap+3f),
- (float2)(0x1.b4a380p+4f, 0x1.b4ee86p+4f),
- (float2)(0x1.28d016p+6f, 0x1.28d6fcp+6f),
- (float2)(0x1.936d22p+7f, 0x1.936e68p+7f),
- (float2)(0x1.122876p+9f, 0x1.122894p+9f),
- (float2)(0x1.749ea6p+10f, 0x1.749eaap+10f),
- (float2)(0x1.fa7158p+11f, 0x1.fa7158p+11f),
- (float2)(0x1.5829dcp+13f, 0x1.5829dep+13f),
- (float2)(0x1.d3c448p+14f, 0x1.d3c448p+14f),
- (float2)(0x1.3de166p+16f, 0x1.3de166p+16f),
- (float2)(0x1.b00b5ap+17f, 0x1.b00b5ap+17f),
- (float2)(0x1.259ac4p+19f, 0x1.259ac4p+19f),
- (float2)(0x1.8f0ccap+20f, 0x1.8f0ccap+20f),
- (float2)(0x1.0f2ebep+22f, 0x1.0f2ebep+22f),
- (float2)(0x1.709348p+23f, 0x1.709348p+23f),
- (float2)(0x1.f4f220p+24f, 0x1.f4f220p+24f),
- (float2)(0x1.546d90p+26f, 0x1.546d90p+26f),
- (float2)(0x1.ceb088p+27f, 0x1.ceb088p+27f),
- (float2)(0x1.3a6e20p+29f, 0x1.3a6e20p+29f),
- (float2)(0x1.ab5adcp+30f, 0x1.ab5adcp+30f),
- (float2)(0x1.226af4p+32f, 0x1.226af4p+32f),
- (float2)(0x1.8ab7fcp+33f, 0x1.8ab7fcp+33f),
- (float2)(0x1.0c3d3ap+35f, 0x1.0c3d3ap+35f),
- (float2)(0x1.6c9326p+36f, 0x1.6c9326p+36f),
- (float2)(0x1.ef8230p+37f, 0x1.ef8230p+37f),
- (float2)(0x1.50bba4p+39f, 0x1.50bba4p+39f),
- (float2)(0x1.c9aae4p+40f, 0x1.c9aae4p+40f),
- (float2)(0x1.370470p+42f, 0x1.370470p+42f),
- (float2)(0x1.a6b766p+43f, 0x1.a6b766p+43f),
- (float2)(0x1.1f43fcp+45f, 0x1.1f43fcp+45f),
- (float2)(0x1.866f34p+46f, 0x1.866f34p+46f),
- (float2)(0x1.0953e2p+48f, 0x1.0953e2p+48f),
- (float2)(0x1.689e22p+49f, 0x1.689e22p+49f),
- (float2)(0x1.ea215ap+50f, 0x1.ea215ap+50f)
-};
-
TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl);
TABLE_FUNCTION(float2, LOG10_TBL, log10_tbl);
uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
return *(__constant uint4 *)(PIBITS_TBL + idx);
}
-
-TABLE_FUNCTION(float2, SINHCOSH_TBL, sinhcosh_tbl);
-
-#ifdef cl_khr_fp64
-
-DECLARE_TABLE(double2, SINH_TBL, 37) = {
- (double2)(0x0.0000000000000p+0, 0x0.0000000000000p+0),
- (double2)(0x1.2cd9fc0000000p+0, 0x1.13ae6096a0092p-26),
- (double2)(0x1.d03cf60000000p+1, 0x1.db70cfb79a640p-26),
- (double2)(0x1.40926e0000000p+3, 0x1.c2526b66dc067p-23),
- (double2)(0x1.b4a3800000000p+4, 0x1.b81b18647f380p-23),
- (double2)(0x1.28d0160000000p+6, 0x1.bc1cdd1e1eb08p-20),
- (double2)(0x1.936d228000000p+7, 0x1.d9f201534fb09p-19),
- (double2)(0x1.1228768000000p+9, 0x1.d1c064a4e9954p-18),
- (double2)(0x1.749ea50000000p+10, 0x1.4eca65d06ea74p-18),
- (double2)(0x1.fa71570000000p+11, 0x1.0c259bcc0ecc5p-15),
- (double2)(0x1.5829dc8000000p+13, 0x1.b5a6647cf9016p-13),
- (double2)(0x1.d3c4488000000p+14, 0x1.9691adefb0870p-15),
- (double2)(0x1.3de1650000000p+16, 0x1.3410fc29cde38p-10),
- (double2)(0x1.b00b590000000p+17, 0x1.6a31a50b6fb3cp-11),
- (double2)(0x1.259ac48000000p+19, 0x1.7defc71805c40p-10),
- (double2)(0x1.8f0cca8000000p+20, 0x1.eb49fd80e0babp-6),
- (double2)(0x1.0f2ebd0000000p+22, 0x1.4fffc7bcd5920p-7),
- (double2)(0x1.7093488000000p+23, 0x1.03a93b6c63435p-3),
- (double2)(0x1.f4f2208000000p+24, 0x1.1940bb255fd1cp-4),
- (double2)(0x1.546d8f8000000p+26, 0x1.ed26e14260b50p-2),
- (double2)(0x1.ceb0888000000p+27, 0x1.b47401fc9f2a2p+0),
- (double2)(0x1.3a6e1f8000000p+29, 0x1.67bb3f55634f1p+3),
- (double2)(0x1.ab5adb8000000p+30, 0x1.c435ff8194ddcp+2),
- (double2)(0x1.226af30000000p+32, 0x1.d8fee052ba63ap+5),
- (double2)(0x1.8ab7fb0000000p+33, 0x1.51d7edccde3f6p+7),
- (double2)(0x1.0c3d390000000p+35, 0x1.04b1644557d1ap+8),
- (double2)(0x1.6c93268000000p+36, 0x1.6a6b5ca0a9dc4p+8),
- (double2)(0x1.ef822f0000000p+37, 0x1.fd9cc72249abap+11),
- (double2)(0x1.50bba30000000p+39, 0x1.e58de693edab5p+13),
- (double2)(0x1.c9aae40000000p+40, 0x1.8c70158ac6363p+14),
- (double2)(0x1.3704708000000p+42, 0x1.7614764f43e20p+15),
- (double2)(0x1.a6b7658000000p+43, 0x1.6337db36fc718p+17),
- (double2)(0x1.1f43fc8000000p+45, 0x1.12d98b1f611e2p+19),
- (double2)(0x1.866f348000000p+46, 0x1.392bc108b37ccp+19),
- (double2)(0x1.0953e28000000p+48, 0x1.ce87bdc3473dcp+22),
- (double2)(0x1.689e220000000p+49, 0x1.bc8d5ae99ad14p+21),
- (double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22),
-};
-
-DECLARE_TABLE(double2, COSH_TBL, 37) = {
- (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
- (double2)(0x1.8b07550000000p+0, 0x1.d9f5504c2bd28p-28),
- (double2)(0x1.e18fa08000000p+1, 0x1.7cb66f0a4c9fdp-25),
- (double2)(0x1.422a490000000p+3, 0x1.f58617928e588p-23),
- (double2)(0x1.b4ee858000000p+4, 0x1.bc7d000c38d48p-25),
- (double2)(0x1.28d6fc8000000p+6, 0x1.f7f9d4e329998p-21),
- (double2)(0x1.936e678000000p+7, 0x1.6e6e464885269p-19),
- (double2)(0x1.1228948000000p+9, 0x1.ba3a8b946c154p-19),
- (double2)(0x1.749eaa8000000p+10, 0x1.3f4e76110d5a4p-18),
- (double2)(0x1.fa71580000000p+11, 0x1.17622515a3e2bp-15),
- (double2)(0x1.5829dd0000000p+13, 0x1.4dc4b528af3d0p-17),
- (double2)(0x1.d3c4488000000p+14, 0x1.1156278615e10p-14),
- (double2)(0x1.3de1650000000p+16, 0x1.35ad50ed821f5p-10),
- (double2)(0x1.b00b590000000p+17, 0x1.6b61055f2935cp-11),
- (double2)(0x1.259ac48000000p+19, 0x1.7e2794a601240p-10),
- (double2)(0x1.8f0cca8000000p+20, 0x1.eb4b45f6aadd3p-6),
- (double2)(0x1.0f2ebd0000000p+22, 0x1.5000b967b3698p-7),
- (double2)(0x1.7093488000000p+23, 0x1.03a940fadc092p-3),
- (double2)(0x1.f4f2208000000p+24, 0x1.1940bf3bf874cp-4),
- (double2)(0x1.546d8f8000000p+26, 0x1.ed26e1a2a2110p-2),
- (double2)(0x1.ceb0888000000p+27, 0x1.b4740205796d6p+0),
- (double2)(0x1.3a6e1f8000000p+29, 0x1.67bb3f55cb85dp+3),
- (double2)(0x1.ab5adb8000000p+30, 0x1.c435ff81e18acp+2),
- (double2)(0x1.226af30000000p+32, 0x1.d8fee052bdea4p+5),
- (double2)(0x1.8ab7fb0000000p+33, 0x1.51d7edccde926p+7),
- (double2)(0x1.0c3d390000000p+35, 0x1.04b1644557e0ep+8),
- (double2)(0x1.6c93268000000p+36, 0x1.6a6b5ca0a9e1cp+8),
- (double2)(0x1.ef822f0000000p+37, 0x1.fd9cc72249abep+11),
- (double2)(0x1.50bba30000000p+39, 0x1.e58de693edab5p+13),
- (double2)(0x1.c9aae40000000p+40, 0x1.8c70158ac6364p+14),
- (double2)(0x1.3704708000000p+42, 0x1.7614764f43e20p+15),
- (double2)(0x1.a6b7658000000p+43, 0x1.6337db36fc718p+17),
- (double2)(0x1.1f43fc8000000p+45, 0x1.12d98b1f611e2p+19),
- (double2)(0x1.866f348000000p+46, 0x1.392bc108b37ccp+19),
- (double2)(0x1.0953e28000000p+48, 0x1.ce87bdc3473dcp+22),
- (double2)(0x1.689e220000000p+49, 0x1.bc8d5ae99ad14p+21),
- (double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22)
-};
-
-TABLE_FUNCTION(double2, SINH_TBL, sinh_tbl);
-TABLE_FUNCTION(double2, COSH_TBL, cosh_tbl);
-
-#endif // cl_khr_fp64
diff --git a/libclc/generic/lib/math/tanh.cl b/libclc/generic/lib/math/tanh.cl
index 707754a13ea75..f576910f16deb 100644
--- a/libclc/generic/lib/math/tanh.cl
+++ b/libclc/generic/lib/math/tanh.cl
@@ -7,133 +7,8 @@
//===----------------------------------------------------------------------===//
#include <clc/clc.h>
-#include <clc/clcmacro.h>
-#include <clc/math/math.h>
+#include <clc/math/clc_tanh.h>
-_CLC_OVERLOAD _CLC_DEF float tanh(float x)
-{
- // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
- // to the following three formulae:
- // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
- // 2. (1 - (2/(exp(2*x) + 1 )))
- // 3. (exp(2*x) - 1)/(exp(2*x) + 1)
- // but computationally, some formulae are better on some ranges.
-
- const float large_threshold = 0x1.0a2b24p+3f;
-
- uint ux = as_uint(x);
- uint aux = ux & EXSIGNBIT_SP32;
- uint xs = ux ^ aux;
-
- float y = as_float(aux);
- float y2 = y*y;
-
- float a1 = mad(y2,
- mad(y2, 0.4891631088530669873e-4F, -0.14628356048797849e-2F),
- -0.28192806108402678e0F);
- float b1 = mad(y2, 0.3427017942262751343e0F, 0.845784192581041099e0F);
-
- float a2 = mad(y2,
- mad(y2, 0.3827534993599483396e-4F, -0.12325644183611929e-2F),
- -0.24069858695196524e0F);
- float b2 = mad(y2, 0.292529068698052819e0F, 0.72209738473684982e0F);
-
- int c = y < 0.9f;
- float a = c ? a1 : a2;
- float b = c ? b1 : b2;
- float zlo = mad(MATH_DIVIDE(a, b), y*y2, y);
-
- float p = exp(2.0f * y) + 1.0f;
- float zhi = 1.0F - MATH_DIVIDE(2.0F, p);
-
- float z = y <= 1.0f ? zlo : zhi;
- z = as_float(xs | as_uint(z));
-
- // Edge cases
- float sone = as_float(0x3f800000U | xs);
- z = y > large_threshold ? sone : z;
- z = aux < 0x39000000 | aux > 0x7f800000 ? x : z;
-
- return z;
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, tanh, float);
-
-#ifdef cl_khr_fp64
-
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
-_CLC_OVERLOAD _CLC_DEF double tanh(double x)
-{
- // The definition of tanh(x) is sinh(x)/cosh(x), which is also equivalent
- // to the following three formulae:
- // 1. (exp(x) - exp(-x))/(exp(x) + exp(-x))
- // 2. (1 - (2/(exp(2*x) + 1 )))
- // 3. (exp(2*x) - 1)/(exp(2*x) + 1)
- // but computationally, some formulae are better on some ranges.
-
- // The point at which e^-x is insignificant compared to e^x = ln(2^27)
- const double large_threshold = 0x1.2b708872320e2p+4;
-
- ulong ux = as_ulong(x);
- ulong ax = ux & ~SIGNBIT_DP64;
- ulong sx = ux ^ ax;
- double y = as_double(ax);
- double y2 = y * y;
-
- // y < 0.9
- double znl = fma(y2,
- fma(y2,
- fma(y2, -0.142077926378834722618091e-7, -0.200047621071909498730453e-3),
- -0.176016349003044679402273e-1),
- -0.274030424656179760118928e0);
-
- double zdl = fma(y2,
- fma(y2,
- fma(y2, 0.2091140262529164482568557e-3, 0.201562166026937652780575e-1),
- 0.381641414288328849317962e0),
- 0.822091273968539282568011e0);
-
- // 0.9 <= y <= 1
- double znm = fma(y2,
- fma(y2,
- fma(y2, -0.115475878996143396378318e-7, -0.165597043903549960486816e-3),
- -0.146173047288731678404066e-1),
- -0.227793870659088295252442e0);
-
- double zdm = fma(y2,
- fma(y2,
- fma(y2, 0.173076050126225961768710e-3, 0.167358775461896562588695e-1),
- 0.317204558977294374244770e0),
- 0.683381611977295894959554e0);
-
- int c = y < 0.9;
- double zn = c ? znl : znm;
- double zd = c ? zdl : zdm;
- double z = y + y*y2 * MATH_DIVIDE(zn, zd);
-
- // y > 1
- double p = exp(2.0 * y) + 1.0;
- double zg = 1.0 - 2.0 / p;
-
- z = y > 1.0 ? zg : z;
-
- // Other cases
- z = y < 0x1.0p-28 | ax > PINFBITPATT_DP64 ? x : z;
-
- z = y > large_threshold ? 1.0 : z;
-
- return as_double(sx | as_ulong(z));
-}
-
-_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, tanh, double);
-
-#endif // cl_khr_fp64
-
-#ifdef cl_khr_fp16
-
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-
-_CLC_DEFINE_UNARY_BUILTIN_FP16(tanh)
-
-#endif
+#define FUNCTION tanh
+#define __CLC_BODY <clc/shared/unary_def.inc>
+#include <clc/math/gentype.inc>
More information about the cfe-commits
mailing list