<html><head><meta http-equiv="Content-Type" content="text/html charset=us-ascii"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class=""><br class=""><div><blockquote type="cite" class=""><div class="">On Feb 24, 2017, at 12:27, Jan Vesely via Libclc-dev <<a href="mailto:libclc-dev@lists.llvm.org" class="">libclc-dev@lists.llvm.org</a>> wrote:</div><br class="Apple-interchange-newline"><div class=""><div class="">mostly copied form amd_builtins<br class="">---<br class="">passes sinh piglit on carrizo and topaz<br class=""><br class=""> generic/include/clc/clc.h         |   1 +<br class=""> generic/include/clc/math/sinh.h   |  24 +++++<br class=""> generic/include/clc/math/sinh.inc |  23 +++++<br class=""> generic/lib/SOURCES               |   1 +<br class=""> generic/lib/math/sinh.cl          | 191 ++++++++++++++++++++++++++++++++++++++<br class=""> 5 files changed, 240 insertions(+)<br class=""> create mode 100644 generic/include/clc/math/sinh.h<br class=""> create mode 100644 generic/include/clc/math/sinh.inc<br class=""> create mode 100644 generic/lib/math/sinh.cl<br class=""><br class="">diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h<br class="">index da526b2..bf4b01d 100644<br class="">--- a/generic/include/clc/clc.h<br class="">+++ b/generic/include/clc/clc.h<br class="">@@ -86,6 +86,7 @@<br class=""> #include <clc/math/round.h><br class=""> #include <clc/math/sin.h><br class=""> #include <clc/math/sincos.h><br class="">+#include <clc/math/sinh.h><br class=""> #include <clc/math/sinpi.h><br class=""> #include <clc/math/sqrt.h><br class=""> #include <clc/math/tan.h><br class="">diff --git a/generic/include/clc/math/sinh.h b/generic/include/clc/math/sinh.h<br class="">new file mode 100644<br class="">index 0000000..a9087dc<br class="">--- /dev/null<br class="">+++ b/generic/include/clc/math/sinh.h<br class="">@@ -0,0 +1,24 @@<br class="">+/*<br class="">+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.<br class="">+ *<br class="">+ * Permission is hereby granted, free of charge, to any person obtaining a copy<br class="">+ * of this software and associated documentation files (the "Software"), to deal<br class="">+ * in the Software without restriction, including without limitation the rights<br class="">+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br class="">+ * copies of the Software, and to permit persons to whom the Software is<br class="">+ * furnished to do so, subject to the following conditions:<br class="">+ *<br class="">+ * The above copyright notice and this permission notice shall be included in<br class="">+ * all copies or substantial portions of the Software.<br class="">+ *<br class="">+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br class="">+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br class="">+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br class="">+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br class="">+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br class="">+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br class="">+ * THE SOFTWARE.<br class="">+ */<br class="">+<br class="">+#define __CLC_BODY <clc/math/sinh.inc><br class="">+#include <clc/math/gentype.inc><br class="">diff --git a/generic/include/clc/math/sinh.inc b/generic/include/clc/math/sinh.inc<br class="">new file mode 100644<br class="">index 0000000..88bb255<br class="">--- /dev/null<br class="">+++ b/generic/include/clc/math/sinh.inc<br class="">@@ -0,0 +1,23 @@<br class="">+/*<br class="">+ * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.<br class="">+ *<br class="">+ * Permission is hereby granted, free of charge, to any person obtaining a copy<br class="">+ * of this software and associated documentation files (the "Software"), to deal<br class="">+ * in the Software without restriction, including without limitation the rights<br class="">+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br class="">+ * copies of the Software, and to permit persons to whom the Software is<br class="">+ * furnished to do so, subject to the following conditions:<br class="">+ *<br class="">+ * The above copyright notice and this permission notice shall be included in<br class="">+ * all copies or substantial portions of the Software.<br class="">+ *<br class="">+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br class="">+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br class="">+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br class="">+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br class="">+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br class="">+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br class="">+ * THE SOFTWARE.<br class="">+ */<br class="">+<br class="">+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sinh(__CLC_GENTYPE x);<br class="">diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES<br class="">index 517daba..9e0157b 100644<br class="">--- a/generic/lib/SOURCES<br class="">+++ b/generic/lib/SOURCES<br class="">@@ -116,6 +116,7 @@ math/pown.cl<br class=""> math/sin.cl<br class=""> math/sincos.cl<br class=""> math/sincos_helpers.cl<br class="">+math/sinh.cl<br class=""> math/sinpi.cl<br class=""> math/clc_sqrt.cl<br class=""> math/sqrt.cl<br class="">diff --git a/generic/lib/math/sinh.cl b/generic/lib/math/sinh.cl<br class="">new file mode 100644<br class="">index 0000000..9159b89<br class="">--- /dev/null<br class="">+++ b/generic/lib/math/sinh.cl<br class="">@@ -0,0 +1,191 @@<br class="">+/*<br class="">+ * Copyright (c) 2014 Advanced Micro Devices, Inc.<br class="">+ *<br class="">+ * Permission is hereby granted, free of charge, to any person obtaining a copy<br class="">+ * of this software and associated documentation files (the "Software"), to deal<br class="">+ * in the Software without restriction, including without limitation the rights<br class="">+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell<br class="">+ * copies of the Software, and to permit persons to whom the Software is<br class="">+ * furnished to do so, subject to the following conditions:<br class="">+ *<br class="">+ * The above copyright notice and this permission notice shall be included in<br class="">+ * all copies or substantial portions of the Software.<br class="">+ *<br class="">+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br class="">+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br class="">+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE<br class="">+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br class="">+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br class="">+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN<br class="">+ * THE SOFTWARE.<br class="">+ */<br class="">+<br class="">+#include <clc/clc.h><br class="">+<br class="">+#include "math.h"<br class="">+#include "tables.h"<br class="">+#include "../clcmacro.h"<br class="">+<br class="">+_CLC_OVERLOAD _CLC_DEF float sinh(float x)<br class="">+{<br class="">+    // After dealing with special cases the computation is split into regions as follows.<br class="">+    // abs(x) >= max_sinh_arg:<br class="">+    // sinh(x) = sign(x)*Inf<br class="">+    // abs(x) >= small_threshold:<br class="">+    // sinh(x) = sign(x)*exp(abs(x))/2 computed using the splitexp and scaleDouble functions as for exp_amd().<br class="">+    // abs(x) < small_threshold:<br class="">+    // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))<br class="">+    // sinh(x) is then sign(x)*z.<br class="">+<br class="">+    const float max_sinh_arg = 0x1.65a9fap+6f;<br class="">+    const float small_threshold = 0x1.0a2b24p+3f;<br class="">+<br class="">+    uint ux = as_uint(x);<br class="">+    uint aux = ux & EXSIGNBIT_SP32;<br class="">+    uint xs = ux ^ aux;<br class="">+    float y = as_float(aux);<br class="">+<br class="">+    // We find the integer part y0 of y and the increment dy = y - y0. We then compute<br class="">+    // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)<br class="">+    // where sinh(y0) and cosh(y0) are tabulated above.<br class="">+    int ind = (int) y;<br class="">+    ind = (uint)ind > 36U ? 0 : ind;<br class="">+<br class="">+    float dy = y - ind;<br class="">+    float dy2 = dy * dy;<br class="">+<br class="">+    float sdy = mad(dy2,<br class="">+                    mad(dy2,<br class="">+                        mad(dy2,<br class="">+                            mad(dy2,<br class="">+                                mad(dy2,<br class="">+                                    mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f),<br class="">+                                    0.250521176994133472333666e-7f),<br class="">+                                0.275573191913636406057211e-5f),<br class="">+                            0.198412698413242405162014e-3f),<br class="">+                         0.833333333333329931873097e-2f),<br class="">+                    0.166666666666666667013899e0f);<br class="">+    sdy = mad(sdy, dy*dy2, dy);<br class="">+<br class="">+    float cdy = mad(dy2,<br class="">+                    mad(dy2,<br class="">+                        mad(dy2,<br class="">+                            mad(dy2,<br class="">+                                mad(dy2,<br class="">+                                    mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f),<br class="">+                                    0.275573350756016588011357e-6f),<br class="">+                                0.248015872460622433115785e-4f),<br class="">+                            0.138888888889814854814536e-2f),<br class="">+                        0.416666666666660876512776e-1f),<br class="">+                    0.500000000000000005911074e0f);<br class="">+    cdy = mad(cdy, dy2, 1.0f);<br class="">+<br class="">+    float2 tv = USE_TABLE(sinhcosh_tbl, ind);<br class="">+    float z = mad(tv.s1, sdy, tv.s0 * cdy);<br class="">+    z = as_float(xs | as_uint(z));<br class="">+<br class="">+    // When y is large enough so that the negative exponential is negligible,<br class="">+    // so sinh(y) is approximated by sign(x)*exp(y)/2.<br class="">+    float t = exp(y - 0x1.62e500p-1f);<br class="">+    float zsmall = mad(0x1.a0210ep-18f, t, t);<br class="">+    zsmall = as_float(xs | as_uint(zsmall));<br class="">+    z = y >= small_threshold ? zsmall : z;<br class="">+<br class="">+    // Corner cases<br class="">+    float zinf = as_float(PINFBITPATT_SP32 | xs);<br class="">+    z = y >= max_sinh_arg ? zinf : z;<br class="">+    z = aux > PINFBITPATT_SP32 | aux < 0x38800000U ? x : z;<br class="">+<br class="">+    return z;<br class="">+}<br class="">+<br class="">+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinh, float);<br class="">+<br class="">+#ifdef cl_khr_fp64<br class="">+#pragma OPENCL EXTENSION cl_khr_fp64 : enable<br class="">+<br class="">+_CLC_OVERLOAD _CLC_DEF double sinh(double x)<br class="">+{<br class="">+    // After dealing with special cases the computation is split into<br class="">+    // regions as follows:<br class="">+    //<br class="">+    // abs(x) >= max_sinh_arg:<br class="">+    // sinh(x) = sign(x)*Inf<br class="">+    //<br class="">+    // abs(x) >= small_threshold:<br class="">+    // sinh(x) = sign(x)*exp(abs(x))/2 computed using the<br class="">+    // splitexp and scaleDouble functions as for exp_amd().<br class="">+    //<br class="">+    // abs(x) < small_threshold:<br class="">+    // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0)))<br class="">+    // sinh(x) is then sign(x)*z.<br class="">+<br class="">+    const double max_sinh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e<br class="">+<br class="">+    // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27)<br class="">+    const double small_threshold = 0x1.2b708872320e2p+4;<br class="">+<br class="">+    double y = fabs(x);<br class="">+<br class="">+    // In this range we find the integer part y0 of y<br class="">+    // and the increment dy = y - y0. We then compute<br class="">+    // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy)<br class="">+    // where sinh(y0) and cosh(y0) are obtained from tables<br class="">+<br class="">+    int ind = min((int)y, 36);<br class="">+    double dy = y - ind;<br class="">+    double dy2 = dy * dy;<br class="">+<br class="">+    double sdy = dy * dy2 *<br class="">+<span class="Apple-tab-span" style="white-space:pre">  </span>         fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span>     fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>     fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span> fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre">     </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>     fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9),<br class="">+<span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>     0.250521176994133472333666e-7),<br class="">+<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span> 0.275573191913636406057211e-5),<br class="">+<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>     0.198412698413242405162014e-3),<br class="">+<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span> 0.833333333333329931873097e-2),<br class="">+<span class="Apple-tab-span" style="white-space:pre">      </span><span class="Apple-tab-span" style="white-space:pre">    </span>     0.166666666666666667013899e0);<br class="">+<br class="">+    double cdy = dy2 * fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre">     </span>                   fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>       fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>   fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>       fma(dy2,<br class="">+<span class="Apple-tab-span" style="white-space:pre"> </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>   fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8),<br class="">+<span class="Apple-tab-span" style="white-space:pre">        </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>   0.275573350756016588011357e-6),<br class="">+<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>       0.248015872460622433115785e-4),<br class="">+<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>   0.138888888889814854814536e-2),<br class="">+<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>       0.416666666666660876512776e-1),<br class="">+<span class="Apple-tab-span" style="white-space:pre">  </span><span class="Apple-tab-span" style="white-space:pre">    </span><span class="Apple-tab-span" style="white-space:pre">    </span>   0.500000000000000005911074e0);<br class="">+<br class="">+    // At this point sinh(dy) is approximated by dy + sdy.<br class="">+    // Shift some significant bits from dy to sdy.<br class="">+    double sdy1 = as_double(as_ulong(dy) & 0xfffffffff8000000UL);<br class="">+    double sdy2 = sdy + (dy - sdy1);<br class="">+<br class="">+    double2 tv = USE_TABLE(cosh_tbl, ind);<br class="">+    double cl = tv.s0;<br class="">+    double ct = tv.s1;<br class="">+    tv = USE_TABLE(sinh_tbl, ind);<br class="">+    double sl = tv.s0;<br class="">+    double st = tv.s1;<br class="">+<br class="">+    double z = fma(cl, sdy1, fma(sl, cdy, fma(cl, sdy2, fma(ct, sdy1, fma(st, cdy, ct*sdy2)) + st))) + sl;<br class="">+<br class="">+    // Other cases<br class="">+    z = (y < 0x1.0p-28) | isnan(x) | isinf(x) ? y : z;<br class="">+<br class="">+    double t = exp(y - 0x1.62e42fefa3800p-1);<br class="">+    t = fma(t, -0x1.ef35793c76641p-45, t);<br class="">+    z = y >= small_threshold ? t : z;<br class="">+    z = y >= max_sinh_arg ? as_double(PINFBITPATT_DP64) : z;<br class="">+<br class="">+    return copysign(z, x);<br class="">+}<br class="">+<br class="">+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double)<br class="">+<br class="">+#endif<br class="">-- <br class="">2.9.3<br class=""><br class="">_______________________________________________<br class="">Libclc-dev mailing list<br class=""><a href="mailto:Libclc-dev@lists.llvm.org" class="">Libclc-dev@lists.llvm.org</a><br class=""><a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev" class="">http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev</a><br class=""></div></div></blockquote><br class=""></div><div>Passes conformance for me, LGTM</div><br class=""></body></html>