<html><head><meta http-equiv="Content-Type" content="text/html charset=us-ascii"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class=""><br class=""><div><blockquote type="cite" class=""><div class="">On Feb 24, 2017, at 12:27, Jan Vesely via Libclc-dev <<a href="mailto:libclc-dev@lists.llvm.org" class="">libclc-dev@lists.llvm.org</a>> wrote:</div><br class="Apple-interchange-newline"><div class=""><div class="">mostly copied form amd_builtins<br class="">---<br class="">passes sinh piglit on carrizo and topaz<br class=""><br class=""> generic/include/clc/clc.h | 1 +<br class=""> generic/include/clc/math/sinh.h | 24 +++++<br class=""> generic/include/clc/math/sinh.inc | 23 +++++<br class=""> generic/lib/SOURCES | 1 +<br class=""> generic/lib/math/sinh.cl | 191 ++++++++++++++++++++++++++++++++++++++<br class=""> 5 files changed, 240 insertions(+)<br class=""> create mode 100644 generic/include/clc/math/sinh.h<br class=""> create mode 100644 generic/include/clc/math/sinh.inc<br class=""> create mode 100644 generic/lib/math/sinh.cl<br class=""><br class="">diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h<br class="">index da526b2..bf4b01d 100644<br class="">--- a/generic/include/clc/clc.h<br class="">+++ b/generic/include/clc/clc.h<br class="">@@ -86,6 +86,7 @@<br class=""> #include <clc/math/round.h><br class=""> #include <clc/math/sin.h><br class=""> #include <clc/math/sincos.h><br class="">+#include <clc/math/sinh.h><br class=""> #include <clc/math/sinpi.h><br class=""> #include <clc/math/sqrt.h><br class=""> #include <clc/math/tan.h><br class="">diff --git a/generic/include/clc/math/sinh.h b/generic/include/clc/math/sinh.h<br class="">new file mode 100644<br class="">index 0000000..a9087dc<br class="">--- /dev/null<br class="">+++ b/generic/include/clc/math/sinh.h<br class="">@@ -0,0 +1,24 @@<br class="">+/*<br class="">+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.<br class="">+ *<br class="">+ * Permission is hereby granted, free of charge, to any person obtaining a copy<br class="">+ * of this software and associated documentation files (the "Software"), to deal<br class="">+ * in the Software without restriction, including without limitation the rights<br class="">+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br class="">+ * copies of the Software, and to permit persons to whom the Software is<br class="">+ * furnished to do so, subject to the following conditions:<br class="">+ *<br class="">+ * The above copyright notice and this permission notice shall be included in<br class="">+ * all copies or substantial portions of the Software.<br class="">+ *<br class="">+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br class="">+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br class="">+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br class="">+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br class="">+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br class="">+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br class="">+ * THE SOFTWARE.<br class="">+ */<br class="">+<br class="">+#define __CLC_BODY <clc/math/sinh.inc><br class="">+#include <clc/math/gentype.inc><br class="">diff --git a/generic/include/clc/math/sinh.inc b/generic/include/clc/math/sinh.inc<br class="">new file mode 100644<br class="">index 0000000..88bb255<br class="">--- /dev/null<br class="">+++ b/generic/include/clc/math/sinh.inc<br class="">@@ -0,0 +1,23 @@<br class="">+/*<br class="">+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.<br class="">+ *<br class="">+ * Permission is hereby granted, free of charge, to any person obtaining a copy<br class="">+ * of this software and associated documentation files (the "Software"), to deal<br class="">+ * in the Software without restriction, including without limitation the rights<br class="">+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br class="">+ * copies of the Software, and to permit persons to whom the Software is<br class="">+ * furnished to do so, subject to the following conditions:<br class="">+ *<br class="">+ * The above copyright notice and this permission notice shall be included in<br class="">+ * all copies or substantial portions of the Software.<br class="">+ *<br class="">+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br class="">+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br class="">+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br class="">+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br class="">+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br class="">+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br class="">+ * THE SOFTWARE.<br class="">+ */<br class="">+<br class="">+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sinh(__CLC_GENTYPE x);<br class="">diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES<br class="">index 517daba..9e0157b 100644<br class="">--- a/generic/lib/SOURCES<br class="">+++ b/generic/lib/SOURCES<br class="">@@ -116,6 +116,7 @@ math/pown.cl<br class=""> math/sin.cl<br class=""> math/sincos.cl<br class=""> math/sincos_helpers.cl<br class="">+math/sinh.cl<br class=""> math/sinpi.cl<br class=""> math/clc_sqrt.cl<br class=""> math/sqrt.cl<br class="">diff --git a/generic/lib/math/sinh.cl b/generic/lib/math/sinh.cl<br class="">new file mode 100644<br class="">index 0000000..9159b89<br class="">--- /dev/null<br class="">+++ b/generic/lib/math/sinh.cl<br class="">@@ -0,0 +1,191 @@<br class="">+/*<br class="">+ * Copyright (c) 2014 Advanced Micro Devices, Inc.<br class="">+ *<br class="">+ * Permission is hereby granted, free of charge, to any person obtaining a copy<br class="">+ * of this software and associated documentation files (the "Software"), to deal<br class="">+ * in the Software without restriction, including without limitation the rights<br class="">+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br class="">+ * copies of the Software, and to permit persons to whom the Software is<br class="">+ * furnished to do so, subject to the following conditions:<br class="">+ *<br class="">+ * The above copyright notice and this permission notice shall be included in<br class="">+ * all copies or substantial portions of the Software.<br class="">+ *<br class="">+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br class="">+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br class="">+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br class="">+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br class="">+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br class="">+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br class="">+ * THE SOFTWARE.<br class="">+ */<br class="">+<br class="">+#include <clc/clc.h><br class="">+<br class="">+#include "math.h"<br class="">+#include "tables.h"<br class="">+#include "../clcmacro.h"<br class="">+<br class="">+_CLC_OVERLOAD _CLC_DEF float sinh(float x)<br class="">+{<br class="">+ // After dealing with special cases the computation is split into regions as follows.<br class="">+ // abs(x) >= max_sinh_arg:<br class="">+ // sinh(x) = sign(x)*Inf<br class="">+ // abs(x) >= small_threshold:<br class="">+ // sinh(x) = sign(x)*exp(abs(x))/2 computed using the splitexp and scaleDouble functions as for exp_amd().<br class="">+ // abs(x) < small_threshold:<br class="">+ // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))<br class="">+ // sinh(x) is then sign(x)*z.<br class="">+<br class="">+ const float max_sinh_arg = 0x1.65a9fap+6f;<br class="">+ const float small_threshold = 0x1.0a2b24p+3f;<br class="">+<br class="">+ uint ux = as_uint(x);<br class="">+ uint aux = ux & EXSIGNBIT_SP32;<br class="">+ uint xs = ux ^ aux;<br class="">+ float y = as_float(aux);<br class="">+<br class="">+ // We find the integer part y0 of y and the increment dy = y - y0. We then compute<br class="">+ // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)<br class="">+ // where sinh(y0) and cosh(y0) are tabulated above.<br class="">+ int ind = (int) y;<br class="">+ ind = (uint)ind > 36U ? 0 : ind;<br class="">+<br class="">+ float dy = y - ind;<br class="">+ float dy2 = dy * dy;<br class="">+<br class="">+ float sdy = mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),<br class="">+ 0.250521176994133472333666e-7f),<br class="">+ 0.275573191913636406057211e-5f),<br class="">+ 0.198412698413242405162014e-3f),<br class="">+ 0.833333333333329931873097e-2f),<br class="">+ 0.166666666666666667013899e0f);<br class="">+ sdy = mad(sdy, dy*dy2, dy);<br class="">+<br class="">+ float cdy = mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2,<br class="">+ mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),<br class="">+ 0.275573350756016588011357e-6f),<br class="">+ 0.248015872460622433115785e-4f),<br class="">+ 0.138888888889814854814536e-2f),<br class="">+ 0.416666666666660876512776e-1f),<br class="">+ 0.500000000000000005911074e0f);<br class="">+ cdy = mad(cdy, dy2, 1.0f);<br class="">+<br class="">+ float2 tv = USE_TABLE(sinhcosh_tbl, ind);<br class="">+ float z = mad(tv.s1, sdy, tv.s0 * cdy);<br class="">+ z = as_float(xs | as_uint(z));<br class="">+<br class="">+ // When y is large enough so that the negative exponential is negligible,<br class="">+ // so sinh(y) is approximated by sign(x)*exp(y)/2.<br class="">+ float t = exp(y - 0x1.62e500p-1f);<br class="">+ float zsmall = mad(0x1.a0210ep-18f, t, t);<br class="">+ zsmall = as_float(xs | as_uint(zsmall));<br class="">+ z = y >= small_threshold ? zsmall : z;<br class="">+<br class="">+ // Corner cases<br class="">+ float zinf = as_float(PINFBITPATT_SP32 | xs);<br class="">+ z = y >= max_sinh_arg ? zinf : z;<br class="">+ z = aux > PINFBITPATT_SP32 | aux < 0x38800000U ? x : z;<br class="">+<br class="">+ return z;<br class="">+}<br class="">+<br class="">+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinh, float);<br class="">+<br class="">+#ifdef cl_khr_fp64<br class="">+#pragma OPENCL EXTENSION cl_khr_fp64 : enable<br class="">+<br class="">+_CLC_OVERLOAD _CLC_DEF double sinh(double x)<br class="">+{<br class="">+ // After dealing with special cases the computation is split into<br class="">+ // regions as follows:<br class="">+ //<br class="">+ // abs(x) >= max_sinh_arg:<br class="">+ // sinh(x) = sign(x)*Inf<br class="">+ //<br class="">+ // abs(x) >= small_threshold:<br class="">+ // sinh(x) = sign(x)*exp(abs(x))/2 computed using the<br class="">+ // splitexp and scaleDouble functions as for exp_amd().<br class="">+ //<br class="">+ // abs(x) < small_threshold:<br class="">+ // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))<br class="">+ // sinh(x) is then sign(x)*z.<br class="">+<br class="">+ const double max_sinh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e<br class="">+<br class="">+ // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)<br class="">+ const double small_threshold = 0x1.2b708872320e2p+4;<br class="">+<br class="">+ double y = fabs(x);<br class="">+<br class="">+ // In this range we find the integer part y0 of y<br class="">+ // and the increment dy = y - y0. We then compute<br class="">+ // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)<br class="">+ // where sinh(y0) and cosh(y0) are obtained from tables<br class="">+<br class="">+ int ind = min((int)y, 36);<br class="">+ double dy = y - ind;<br class="">+ double dy2 = dy * dy;<br class="">+<br class="">+ double sdy = dy * dy2 *<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.250521176994133472333666e-7),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.275573191913636406057211e-5),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.198412698413242405162014e-3),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.833333333333329931873097e-2),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.166666666666666667013899e0);<br class="">+<br class="">+ double cdy = dy2 * fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.275573350756016588011357e-6),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.248015872460622433115785e-4),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.138888888889814854814536e-2),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.416666666666660876512776e-1),<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre"> </span> 0.500000000000000005911074e0);<br class="">+<br class="">+ // At this point sinh(dy) is approximated by dy + sdy.<br class="">+ // Shift some significant bits from dy to sdy.<br class="">+ double sdy1 = as_double(as_ulong(dy) & 0xfffffffff8000000UL);<br class="">+ double sdy2 = sdy + (dy - sdy1);<br class="">+<br class="">+ double2 tv = USE_TABLE(cosh_tbl, ind);<br class="">+ double cl = tv.s0;<br class="">+ double ct = tv.s1;<br class="">+ tv = USE_TABLE(sinh_tbl, ind);<br class="">+ double sl = tv.s0;<br class="">+ double st = tv.s1;<br class="">+<br class="">+ double z = fma(cl, sdy1, fma(sl, cdy, fma(cl, sdy2, fma(ct, sdy1, fma(st, cdy, ct*sdy2)) + st))) + sl;<br class="">+<br class="">+ // Other cases<br class="">+ z = (y < 0x1.0p-28) | isnan(x) | isinf(x) ? y : z;<br class="">+<br class="">+ double t = exp(y - 0x1.62e42fefa3800p-1);<br class="">+ t = fma(t, -0x1.ef35793c76641p-45, t);<br class="">+ z = y >= small_threshold ? t : z;<br class="">+ z = y >= max_sinh_arg ? as_double(PINFBITPATT_DP64) : z;<br class="">+<br class="">+ return copysign(z, x);<br class="">+}<br class="">+<br class="">+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)<br class="">+<br class="">+#endif<br class="">-- <br class="">2.9.3<br class=""><br class="">_______________________________________________<br class="">Libclc-dev mailing list<br class=""><a href="mailto:Libclc-dev@lists.llvm.org" class="">Libclc-dev@lists.llvm.org</a><br class=""><a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev" class="">http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev</a><br class=""></div></div></blockquote><br class=""></div><div>Passes conformance for me, LGTM</div><br class=""></body></html>