[Libclc-dev] [PATCH 15/15] Implement cbrt builtin

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Mon Feb 1 20:48:56 PST 2016


Hi Tom,

Did this patch fall through the cracks?  I was looking through my local
repository of random patches, and while going through this series, I
noticed that cbrt was still not present in libclc.

--Aaron

On Tue, Apr 7, 2015 at 1:05 PM, Tom Stellard <thomas.stellard at amd.com>
wrote:

> This implementation was ported from the AMD builtin library
> and has been tested with piglit, OpenCV, and the ocl conformance tests.
> ---
>  generic/include/clc/clc.h         |   1 +
>  generic/include/clc/math/cbrt.h   |  24 ++
>  generic/include/clc/math/cbrt.inc |  23 ++
>  generic/lib/SOURCES               |   1 +
>  generic/lib/math/cbrt.cl          | 151 +++++++++
>  generic/lib/math/tables.cl        | 665
> ++++++++++++++++++++++++++++++++++++++
>  generic/lib/math/tables.h         |   4 +
>  7 files changed, 869 insertions(+)
>  create mode 100644 generic/include/clc/math/cbrt.h
>  create mode 100644 generic/include/clc/math/cbrt.inc
>  create mode 100644 generic/lib/math/cbrt.cl
>
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 61e45c3..3cb68bd 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -43,6 +43,7 @@
>  #include <clc/math/atan2pi.h>
>  #include <clc/math/atanh.h>
>  #include <clc/math/atanpi.h>
> +#include <clc/math/cbrt.h>
>  #include <clc/math/copysign.h>
>  #include <clc/math/cos.h>
>  #include <clc/math/cosh.h>
> diff --git a/generic/include/clc/math/cbrt.h
> b/generic/include/clc/math/cbrt.h
> new file mode 100644
> index 0000000..15d23e7
> --- /dev/null
> +++ b/generic/include/clc/math/cbrt.h
> @@ -0,0 +1,24 @@
> +/*
> + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
> a copy
> + * of this software and associated documentation files (the "Software"),
> to deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or
> sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN
> + * THE SOFTWARE.
> + */
> +
> +#define __CLC_BODY <clc/math/cbrt.inc>
> +#include <clc/math/gentype.inc>
> diff --git a/generic/include/clc/math/cbrt.inc
> b/generic/include/clc/math/cbrt.inc
> new file mode 100644
> index 0000000..29e2ea4
> --- /dev/null
> +++ b/generic/include/clc/math/cbrt.inc
> @@ -0,0 +1,23 @@
> +/*
> + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
> a copy
> + * of this software and associated documentation files (the "Software"),
> to deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or
> sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN
> + * THE SOFTWARE.
> + */
> +
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cbrt(__CLC_GENTYPE x);
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 08f79a0..16a8a5c 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -73,6 +73,7 @@ math/atan2.cl
>  math/atan2pi.cl
>  math/atanh.cl
>  math/atanpi.cl
> +math/cbrt.cl
>  math/copysign.cl
>  math/cos.cl
>  math/cosh.cl
> diff --git a/generic/lib/math/cbrt.cl b/generic/lib/math/cbrt.cl
> new file mode 100644
> index 0000000..5ff9367
> --- /dev/null
> +++ b/generic/lib/math/cbrt.cl
> @@ -0,0 +1,151 @@
> +/*
> + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining
> a copy
> + * of this software and associated documentation files (the "Software"),
> to deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or
> sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> IN
> + * THE SOFTWARE.
> + */
> +
> +#include <clc/clc.h>
> +
> +#include "math.h"
> +#include "tables.h"
> +#include "../clcmacro.h"
> +
> +_CLC_OVERLOAD _CLC_DEF float cbrt(float x) {
> +
> +    uint xi = as_uint(x);
> +    uint axi = xi & EXSIGNBIT_SP32;
> +    uint xsign = axi ^ xi;
> +    xi = axi;
> +
> +    int m = (xi >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32;
> +
> +    // Treat subnormals
> +    uint xisub = as_uint(as_float(xi | 0x3f800000) - 1.0f);
> +    int msub = (xisub >> EXPSHIFTBITS_SP32) - 253;
> +    int c = m == -127;
> +    xi = c ? xisub : xi;
> +    m = c ? msub : m;
> +
> +    int m3 = m / 3;
> +    int rem = m - m3*3;
> +    float mf = as_float((m3 + EXPBIAS_SP32) << EXPSHIFTBITS_SP32);
> +
> +    uint indx = (xi & 0x007f0000) + ((xi & 0x00008000) << 1);
> +    float f = as_float((xi & MANTBITS_SP32) | 0x3f000000) - as_float(indx
> | 0x3f000000);
> +
> +    indx >>= 16;
> +    float r = f * USE_TABLE(log_inv_tbl, indx);
> +    float poly = mad(mad(r, 0x1.f9add4p-5f, -0x1.c71c72p-4f), r*r, r *
> 0x1.555556p-2f);
> +
> +    // This could also be done with a 5-element table
> +    float remH = 0x1.428000p-1f;
> +    float remT = 0x1.45f31ap-14f;
> +
> +    remH = rem == -1 ? 0x1.964000p-1f : remH;
> +    remT = rem == -1 ? 0x1.fea53ep-13f : remT;
> +
> +    remH = rem ==  0 ? 0x1.000000p+0f : remH;
> +    remT = rem ==  0 ? 0x0.000000p+0f  : remT;
> +
> +    remH = rem ==  1 ? 0x1.428000p+0f : remH;
> +    remT = rem ==  1 ? 0x1.45f31ap-13f : remT;
> +
> +    remH = rem ==  2 ? 0x1.964000p+0f : remH;
> +    remT = rem ==  2 ? 0x1.fea53ep-12f : remT;
> +
> +    float2 tv = USE_TABLE(cbrt_tbl, indx);
> +    float cbrtH = tv.s0;
> +    float cbrtT = tv.s1;
> +
> +    float bH = cbrtH * remH;
> +    float bT = mad(cbrtH, remT, mad(cbrtT, remH, cbrtT*remT));
> +
> +    float z = mad(poly, bH, mad(poly, bT, bT)) + bH;
> +    z *= mf;
> +    z = as_float(as_uint(z) | xsign);
> +    c = axi >= EXPBITS_SP32 | axi == 0;
> +    z = c ? x : z;
> +    return z;
> +
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cbrt, float);
> +
> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +
> +_CLC_OVERLOAD _CLC_DEF double cbrt(double x) {
> +
> +    int return_x = isinf(x) | isnan(x) | x == 0.0;
> +    ulong ux = as_ulong(fabs(x));
> +    int m = (as_int2(ux).hi >> 20) - 1023;
> +
> +    // Treat subnormals
> +    ulong uxs = as_ulong(as_double(0x3ff0000000000000UL | ux) - 1.0);
> +    int ms = m + (as_int2(uxs).hi >> 20) - 1022;
> +
> +    int c = m == -1023;
> +    ux = c ? uxs : ux;
> +    m = c ? ms : m;
> +
> +    int mby3 = m / 3;
> +    int rem = m - 3*mby3;
> +
> +    double mf = as_double((ulong)(mby3 + 1023) << 52);
> +
> +    ux &= 0x000fffffffffffffUL;
> +    double Y = as_double(0x3fe0000000000000UL | ux);
> +
> +    // nearest integer
> +    int index = as_int2(ux).hi >> 11;
> +    index = (0x100 | (index >> 1)) + (index & 1);
> +    double F = (double)index * 0x1.0p-9;
> +
> +    double f = Y - F;
> +    double r = f * USE_TABLE(cbrt_inv_tbl, index-256);
> +
> +    double z = r * fma(r,
> +                       fma(r,
> +                           fma(r,
> +                               fma(r,
> +                                   fma(r, -0x1.8090d6221a247p-6,
> 0x1.ee7113506ac13p-6),
> +                                   -0x1.511e8d2b3183bp-5),
> +                               0x1.f9add3c0ca458p-5),
> +                           -0x1.c71c71c71c71cp-4),
> +                       0x1.5555555555555p-2);
> +
> +    double2 tv = USE_TABLE(cbrt_rem_tbl, rem+2);
> +    double Rem_h = tv.s0;
> +    double Rem_t = tv.s1;
> +
> +    tv = USE_TABLE(cbrt_dbl_tbl, index-256);
> +    double F_h = tv.s0;
> +    double F_t = tv.s1;
> +
> +    double b_h = F_h * Rem_h;
> +    double b_t = fma(Rem_t, F_h, fma(F_t, Rem_h, F_t*Rem_t));
> +
> +    double ans = fma(z, b_h, fma(z, b_t, b_t)) + b_h;
> +    ans = copysign(ans*mf, x);
> +    return return_x ? x : ans;
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cbrt, double)
> +
> +#endif
> diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
> index e21c66b..afb63c0 100644
> --- a/generic/lib/math/tables.cl
> +++ b/generic/lib/math/tables.cl
> @@ -345,6 +345,138 @@ DECLARE_TABLE(float2, SINHCOSH_TBL, 37) = {
>      (float2)(0x1.ea215ap+50f, 0x1.ea215ap+50f)
>  };
>
> +DECLARE_TABLE(float2, CBRT_TBL, 129) = {
> +    (float2)(0x1.000000p+0f, 0x0.000000p+0f),
> +    (float2)(0x1.008000p+0f, 0x1.51cb0ap-11f),
> +    (float2)(0x1.014000p+0f, 0x1.39221ep-12f),
> +    (float2)(0x1.01c000p+0f, 0x1.e06908p-11f),
> +    (float2)(0x1.028000p+0f, 0x1.1d6978p-11f),
> +    (float2)(0x1.034000p+0f, 0x1.4ea1bep-13f),
> +    (float2)(0x1.03c000p+0f, 0x1.833b8ep-11f),
> +    (float2)(0x1.048000p+0f, 0x1.587002p-12f),
> +    (float2)(0x1.050000p+0f, 0x1.ceb290p-11f),
> +    (float2)(0x1.05c000p+0f, 0x1.d57f34p-12f),
> +    (float2)(0x1.068000p+0f, 0x1.cc53acp-21f),
> +    (float2)(0x1.070000p+0f, 0x1.0fe098p-11f),
> +    (float2)(0x1.07c000p+0f, 0x1.91b586p-15f),
> +    (float2)(0x1.084000p+0f, 0x1.1c362ep-11f),
> +    (float2)(0x1.090000p+0f, 0x1.94398ep-15f),
> +    (float2)(0x1.098000p+0f, 0x1.1055bcp-11f),
> +    (float2)(0x1.0a4000p+0f, 0x1.7e63cap-19f),
> +    (float2)(0x1.0ac000p+0f, 0x1.d99e1ap-12f),
> +    (float2)(0x1.0b4000p+0f, 0x1.d258dep-11f),
> +    (float2)(0x1.0c0000p+0f, 0x1.645962p-12f),
> +    (float2)(0x1.0c8000p+0f, 0x1.8c5b0ep-11f),
> +    (float2)(0x1.0d4000p+0f, 0x1.83d0c8p-13f),
> +    (float2)(0x1.0dc000p+0f, 0x1.300812p-11f),
> +    (float2)(0x1.0e4000p+0f, 0x1.f9a65ap-11f),
> +    (float2)(0x1.0f0000p+0f, 0x1.7bbcd8p-12f),
> +    (float2)(0x1.0f8000p+0f, 0x1.7cbf68p-11f),
> +    (float2)(0x1.104000p+0f, 0x1.b2c166p-14f),
> +    (float2)(0x1.10c000p+0f, 0x1.d56ea4p-12f),
> +    (float2)(0x1.114000p+0f, 0x1.99eb32p-11f),
> +    (float2)(0x1.120000p+0f, 0x1.1007a2p-13f),
> +    (float2)(0x1.128000p+0f, 0x1.d212aap-12f),
> +    (float2)(0x1.130000p+0f, 0x1.890f18p-11f),
> +    (float2)(0x1.13c000p+0f, 0x1.2104e2p-14f),
> +    (float2)(0x1.144000p+0f, 0x1.74961ep-12f),
> +    (float2)(0x1.14c000p+0f, 0x1.4b9b66p-11f),
> +    (float2)(0x1.154000p+0f, 0x1.d81e66p-11f),
> +    (float2)(0x1.160000p+0f, 0x1.7f825cp-13f),
> +    (float2)(0x1.168000p+0f, 0x1.c5dca2p-12f),
> +    (float2)(0x1.170000p+0f, 0x1.6153bap-11f),
> +    (float2)(0x1.178000p+0f, 0x1.db1cc2p-11f),
> +    (float2)(0x1.184000p+0f, 0x1.4154b0p-13f),
> +    (float2)(0x1.18c000p+0f, 0x1.821114p-12f),
> +    (float2)(0x1.194000p+0f, 0x1.2d4240p-11f),
> +    (float2)(0x1.19c000p+0f, 0x1.950d82p-11f),
> +    (float2)(0x1.1a4000p+0f, 0x1.f8755cp-11f),
> +    (float2)(0x1.1b0000p+0f, 0x1.5e12a4p-13f),
> +    (float2)(0x1.1b8000p+0f, 0x1.648c38p-12f),
> +    (float2)(0x1.1c0000p+0f, 0x1.08c43ep-11f),
> +    (float2)(0x1.1c8000p+0f, 0x1.5b0970p-11f),
> +    (float2)(0x1.1d0000p+0f, 0x1.a91fe8p-11f),
> +    (float2)(0x1.1d8000p+0f, 0x1.f311b6p-11f),
> +    (float2)(0x1.1e4000p+0f, 0x1.c74618p-14f),
> +    (float2)(0x1.1ec000p+0f, 0x1.eabb54p-13f),
> +    (float2)(0x1.1f4000p+0f, 0x1.70db14p-12f),
> +    (float2)(0x1.1fc000p+0f, 0x1.e45cbcp-12f),
> +    (float2)(0x1.204000p+0f, 0x1.27faa6p-11f),
> +    (float2)(0x1.20c000p+0f, 0x1.59db98p-11f),
> +    (float2)(0x1.214000p+0f, 0x1.87da46p-11f),
> +    (float2)(0x1.21c000p+0f, 0x1.b1ffa0p-11f),
> +    (float2)(0x1.224000p+0f, 0x1.d85478p-11f),
> +    (float2)(0x1.22c000p+0f, 0x1.fae17ep-11f),
> +    (float2)(0x1.238000p+0f, 0x1.9af40cp-15f),
> +    (float2)(0x1.240000p+0f, 0x1.a6319ep-14f),
> +    (float2)(0x1.248000p+0f, 0x1.30baa6p-13f),
> +    (float2)(0x1.250000p+0f, 0x1.7fc362p-13f),
> +    (float2)(0x1.258000p+0f, 0x1.c05362p-13f),
> +    (float2)(0x1.260000p+0f, 0x1.f28a98p-13f),
> +    (float2)(0x1.268000p+0f, 0x1.0b4442p-12f),
> +    (float2)(0x1.270000p+0f, 0x1.16361ap-12f),
> +    (float2)(0x1.278000p+0f, 0x1.1a2a2ap-12f),
> +    (float2)(0x1.280000p+0f, 0x1.172f8ep-12f),
> +    (float2)(0x1.288000p+0f, 0x1.0d5530p-12f),
> +    (float2)(0x1.290000p+0f, 0x1.f9538ep-13f),
> +    (float2)(0x1.298000p+0f, 0x1.ca77b0p-13f),
> +    (float2)(0x1.2a0000p+0f, 0x1.8e336ap-13f),
> +    (float2)(0x1.2a8000p+0f, 0x1.44a304p-13f),
> +    (float2)(0x1.2b0000p+0f, 0x1.dbc4c8p-14f),
> +    (float2)(0x1.2b8000p+0f, 0x1.141a2ap-14f),
> +    (float2)(0x1.2c0000p+0f, 0x1.93e44cp-17f),
> +    (float2)(0x1.2c4000p+0f, 0x1.e6e432p-11f),
> +    (float2)(0x1.2cc000p+0f, 0x1.c447c6p-11f),
> +    (float2)(0x1.2d4000p+0f, 0x1.9e80d8p-11f),
> +    (float2)(0x1.2dc000p+0f, 0x1.7595dcp-11f),
> +    (float2)(0x1.2e4000p+0f, 0x1.498d30p-11f),
> +    (float2)(0x1.2ec000p+0f, 0x1.1a6d1ep-11f),
> +    (float2)(0x1.2f4000p+0f, 0x1.d077bap-12f),
> +    (float2)(0x1.2fc000p+0f, 0x1.65ff1ep-12f),
> +    (float2)(0x1.304000p+0f, 0x1.eaf912p-13f),
> +    (float2)(0x1.30c000p+0f, 0x1.fbefb8p-14f),
> +    (float2)(0x1.314000p+0f, 0x1.44905ap-19f),
> +    (float2)(0x1.318000p+0f, 0x1.c017e6p-11f),
> +    (float2)(0x1.320000p+0f, 0x1.7bfdbep-11f),
> +    (float2)(0x1.328000p+0f, 0x1.34fbc6p-11f),
> +    (float2)(0x1.330000p+0f, 0x1.d62f48p-12f),
> +    (float2)(0x1.338000p+0f, 0x1.3cadc6p-12f),
> +    (float2)(0x1.340000p+0f, 0x1.3afc06p-13f),
> +    (float2)(0x1.344000p+0f, 0x1.fc556ep-11f),
> +    (float2)(0x1.34c000p+0f, 0x1.a71f84p-11f),
> +    (float2)(0x1.354000p+0f, 0x1.4f2290p-11f),
> +    (float2)(0x1.35c000p+0f, 0x1.e8c79cp-12f),
> +    (float2)(0x1.364000p+0f, 0x1.2dd0d8p-12f),
> +    (float2)(0x1.36c000p+0f, 0x1.b5ac2ep-14f),
> +    (float2)(0x1.370000p+0f, 0x1.d3d02ap-11f),
> +    (float2)(0x1.378000p+0f, 0x1.6e3d58p-11f),
> +    (float2)(0x1.380000p+0f, 0x1.060200p-11f),
> +    (float2)(0x1.388000p+0f, 0x1.364608p-12f),
> +    (float2)(0x1.390000p+0f, 0x1.6d29b6p-14f),
> +    (float2)(0x1.394000p+0f, 0x1.bd8d5ep-11f),
> +    (float2)(0x1.39c000p+0f, 0x1.4ae030p-11f),
> +    (float2)(0x1.3a4000p+0f, 0x1.ab44b2p-12f),
> +    (float2)(0x1.3ac000p+0f, 0x1.7761cep-13f),
> +    (float2)(0x1.3b0000p+0f, 0x1.e38710p-11f),
> +    (float2)(0x1.3b8000p+0f, 0x1.66b2b0p-11f),
> +    (float2)(0x1.3c0000p+0f, 0x1.cebf96p-12f),
> +    (float2)(0x1.3c8000p+0f, 0x1.964b20p-13f),
> +    (float2)(0x1.3cc000p+0f, 0x1.e15004p-11f),
> +    (float2)(0x1.3d4000p+0f, 0x1.5a9bcep-11f),
> +    (float2)(0x1.3dc000p+0f, 0x1.a2f4d8p-12f),
> +    (float2)(0x1.3e4000p+0f, 0x1.17c056p-13f),
> +    (float2)(0x1.3e8000p+0f, 0x1.b800f8p-11f),
> +    (float2)(0x1.3f0000p+0f, 0x1.27b132p-11f),
> +    (float2)(0x1.3f8000p+0f, 0x1.2a09b8p-12f),
> +    (float2)(0x1.400000p+0f, 0x0.000000p+0f),
> +    (float2)(0x1.404000p+0f, 0x1.68a69cp-11f),
> +    (float2)(0x1.40c000p+0f, 0x1.9df950p-12f),
> +    (float2)(0x1.414000p+0f, 0x1.983050p-14f),
> +    (float2)(0x1.418000p+0f, 0x1.94c6a4p-11f),
> +    (float2)(0x1.420000p+0f, 0x1.e88494p-12f),
> +    (float2)(0x1.428000p+0f, 0x1.45f31ap-13f)
> +};
> +
>  TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl);
>  TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl);
>
> @@ -353,6 +485,7 @@ uint4 TABLE_MANGLE(pibits_tbl)(size_t idx) {
>  }
>
>  TABLE_FUNCTION(float2, SINHCOSH_TBL, sinhcosh_tbl);
> +TABLE_FUNCTION(float2, CBRT_TBL, cbrt_tbl);
>
>  #ifdef cl_khr_fp64
>
> @@ -826,9 +959,541 @@ DECLARE_TABLE(double2, COSH_TBL, 37) = {
>      (double2)(0x1.ea215a0000000p+50, 0x1.d20d76744835cp+22)
>  };
>
> +DECLARE_TABLE(double, CBRT_INV_TBL, 257) = {
> +    0x1.0000000000000p+1,
> +    0x1.fe01fe01fe020p+0,
> +    0x1.fc07f01fc07f0p+0,
> +    0x1.fa11caa01fa12p+0,
> +    0x1.f81f81f81f820p+0,
> +    0x1.f6310aca0dbb5p+0,
> +    0x1.f44659e4a4271p+0,
> +    0x1.f25f644230ab5p+0,
> +    0x1.f07c1f07c1f08p+0,
> +    0x1.ee9c7f8458e02p+0,
> +    0x1.ecc07b301ecc0p+0,
> +    0x1.eae807aba01ebp+0,
> +    0x1.e9131abf0b767p+0,
> +    0x1.e741aa59750e4p+0,
> +    0x1.e573ac901e574p+0,
> +    0x1.e3a9179dc1a73p+0,
> +    0x1.e1e1e1e1e1e1ep+0,
> +    0x1.e01e01e01e01ep+0,
> +    0x1.de5d6e3f8868ap+0,
> +    0x1.dca01dca01dcap+0,
> +    0x1.dae6076b981dbp+0,
> +    0x1.d92f2231e7f8ap+0,
> +    0x1.d77b654b82c34p+0,
> +    0x1.d5cac807572b2p+0,
> +    0x1.d41d41d41d41dp+0,
> +    0x1.d272ca3fc5b1ap+0,
> +    0x1.d0cb58f6ec074p+0,
> +    0x1.cf26e5c44bfc6p+0,
> +    0x1.cd85689039b0bp+0,
> +    0x1.cbe6d9601cbe7p+0,
> +    0x1.ca4b3055ee191p+0,
> +    0x1.c8b265afb8a42p+0,
> +    0x1.c71c71c71c71cp+0,
> +    0x1.c5894d10d4986p+0,
> +    0x1.c3f8f01c3f8f0p+0,
> +    0x1.c26b5392ea01cp+0,
> +    0x1.c0e070381c0e0p+0,
> +    0x1.bf583ee868d8bp+0,
> +    0x1.bdd2b899406f7p+0,
> +    0x1.bc4fd65883e7bp+0,
> +    0x1.bacf914c1bad0p+0,
> +    0x1.b951e2b18ff23p+0,
> +    0x1.b7d6c3dda338bp+0,
> +    0x1.b65e2e3beee05p+0,
> +    0x1.b4e81b4e81b4fp+0,
> +    0x1.b37484ad806cep+0,
> +    0x1.b2036406c80d9p+0,
> +    0x1.b094b31d922a4p+0,
> +    0x1.af286bca1af28p+0,
> +    0x1.adbe87f94905ep+0,
> +    0x1.ac5701ac5701bp+0,
> +    0x1.aaf1d2f87ebfdp+0,
> +    0x1.a98ef606a63bep+0,
> +    0x1.a82e65130e159p+0,
> +    0x1.a6d01a6d01a6dp+0,
> +    0x1.a574107688a4ap+0,
> +    0x1.a41a41a41a41ap+0,
> +    0x1.a2c2a87c51ca0p+0,
> +    0x1.a16d3f97a4b02p+0,
> +    0x1.a01a01a01a01ap+0,
> +    0x1.9ec8e951033d9p+0,
> +    0x1.9d79f176b682dp+0,
> +    0x1.9c2d14ee4a102p+0,
> +    0x1.9ae24ea5510dap+0,
> +    0x1.999999999999ap+0,
> +    0x1.9852f0d8ec0ffp+0,
> +    0x1.970e4f80cb872p+0,
> +    0x1.95cbb0be377aep+0,
> +    0x1.948b0fcd6e9e0p+0,
> +    0x1.934c67f9b2ce6p+0,
> +    0x1.920fb49d0e229p+0,
> +    0x1.90d4f120190d5p+0,
> +    0x1.8f9c18f9c18fap+0,
> +    0x1.8e6527af1373fp+0,
> +    0x1.8d3018d3018d3p+0,
> +    0x1.8bfce8062ff3ap+0,
> +    0x1.8acb90f6bf3aap+0,
> +    0x1.899c0f601899cp+0,
> +    0x1.886e5f0abb04ap+0,
> +    0x1.87427bcc092b9p+0,
> +    0x1.8618618618618p+0,
> +    0x1.84f00c2780614p+0,
> +    0x1.83c977ab2beddp+0,
> +    0x1.82a4a0182a4a0p+0,
> +    0x1.8181818181818p+0,
> +    0x1.8060180601806p+0,
> +    0x1.7f405fd017f40p+0,
> +    0x1.7e225515a4f1dp+0,
> +    0x1.7d05f417d05f4p+0,
> +    0x1.7beb3922e017cp+0,
> +    0x1.7ad2208e0ecc3p+0,
> +    0x1.79baa6bb6398bp+0,
> +    0x1.78a4c8178a4c8p+0,
> +    0x1.77908119ac60dp+0,
> +    0x1.767dce434a9b1p+0,
> +    0x1.756cac201756dp+0,
> +    0x1.745d1745d1746p+0,
> +    0x1.734f0c541fe8dp+0,
> +    0x1.724287f46debcp+0,
> +    0x1.713786d9c7c09p+0,
> +    0x1.702e05c0b8170p+0,
> +    0x1.6f26016f26017p+0,
> +    0x1.6e1f76b4337c7p+0,
> +    0x1.6d1a62681c861p+0,
> +    0x1.6c16c16c16c17p+0,
> +    0x1.6b1490aa31a3dp+0,
> +    0x1.6a13cd1537290p+0,
> +    0x1.691473a88d0c0p+0,
> +    0x1.6816816816817p+0,
> +    0x1.6719f3601671ap+0,
> +    0x1.661ec6a5122f9p+0,
> +    0x1.6524f853b4aa3p+0,
> +    0x1.642c8590b2164p+0,
> +    0x1.63356b88ac0dep+0,
> +    0x1.623fa77016240p+0,
> +    0x1.614b36831ae94p+0,
> +    0x1.6058160581606p+0,
> +    0x1.5f66434292dfcp+0,
> +    0x1.5e75bb8d015e7p+0,
> +    0x1.5d867c3ece2a5p+0,
> +    0x1.5c9882b931057p+0,
> +    0x1.5babcc647fa91p+0,
> +    0x1.5ac056b015ac0p+0,
> +    0x1.59d61f123ccaap+0,
> +    0x1.58ed2308158edp+0,
> +    0x1.5805601580560p+0,
> +    0x1.571ed3c506b3ap+0,
> +    0x1.56397ba7c52e2p+0,
> +    0x1.5555555555555p+0,
> +    0x1.54725e6bb82fep+0,
> +    0x1.5390948f40febp+0,
> +    0x1.52aff56a8054bp+0,
> +    0x1.51d07eae2f815p+0,
> +    0x1.50f22e111c4c5p+0,
> +    0x1.5015015015015p+0,
> +    0x1.4f38f62dd4c9bp+0,
> +    0x1.4e5e0a72f0539p+0,
> +    0x1.4d843bedc2c4cp+0,
> +    0x1.4cab88725af6ep+0,
> +    0x1.4bd3edda68fe1p+0,
> +    0x1.4afd6a052bf5bp+0,
> +    0x1.4a27fad76014ap+0,
> +    0x1.49539e3b2d067p+0,
> +    0x1.4880522014880p+0,
> +    0x1.47ae147ae147bp+0,
> +    0x1.46dce34596066p+0,
> +    0x1.460cbc7f5cf9ap+0,
> +    0x1.453d9e2c776cap+0,
> +    0x1.446f86562d9fbp+0,
> +    0x1.43a2730abee4dp+0,
> +    0x1.42d6625d51f87p+0,
> +    0x1.420b5265e5951p+0,
> +    0x1.4141414141414p+0,
> +    0x1.40782d10e6566p+0,
> +    0x1.3fb013fb013fbp+0,
> +    0x1.3ee8f42a5af07p+0,
> +    0x1.3e22cbce4a902p+0,
> +    0x1.3d5d991aa75c6p+0,
> +    0x1.3c995a47babe7p+0,
> +    0x1.3bd60d9232955p+0,
> +    0x1.3b13b13b13b14p+0,
> +    0x1.3a524387ac822p+0,
> +    0x1.3991c2c187f63p+0,
> +    0x1.38d22d366088ep+0,
> +    0x1.3813813813814p+0,
> +    0x1.3755bd1c945eep+0,
> +    0x1.3698df3de0748p+0,
> +    0x1.35dce5f9f2af8p+0,
> +    0x1.3521cfb2b78c1p+0,
> +    0x1.34679ace01346p+0,
> +    0x1.33ae45b57bcb2p+0,
> +    0x1.32f5ced6a1dfap+0,
> +    0x1.323e34a2b10bfp+0,
> +    0x1.3187758e9ebb6p+0,
> +    0x1.30d190130d190p+0,
> +    0x1.301c82ac40260p+0,
> +    0x1.2f684bda12f68p+0,
> +    0x1.2eb4ea1fed14bp+0,
> +    0x1.2e025c04b8097p+0,
> +    0x1.2d50a012d50a0p+0,
> +    0x1.2c9fb4d812ca0p+0,
> +    0x1.2bef98e5a3711p+0,
> +    0x1.2b404ad012b40p+0,
> +    0x1.2a91c92f3c105p+0,
> +    0x1.29e4129e4129ep+0,
> +    0x1.293725bb804a5p+0,
> +    0x1.288b01288b013p+0,
> +    0x1.27dfa38a1ce4dp+0,
> +    0x1.27350b8812735p+0,
> +    0x1.268b37cd60127p+0,
> +    0x1.25e22708092f1p+0,
> +    0x1.2539d7e9177b2p+0,
> +    0x1.2492492492492p+0,
> +    0x1.23eb79717605bp+0,
> +    0x1.23456789abcdfp+0,
> +    0x1.22a0122a0122ap+0,
> +    0x1.21fb78121fb78p+0,
> +    0x1.21579804855e6p+0,
> +    0x1.20b470c67c0d9p+0,
> +    0x1.2012012012012p+0,
> +    0x1.1f7047dc11f70p+0,
> +    0x1.1ecf43c7fb84cp+0,
> +    0x1.1e2ef3b3fb874p+0,
> +    0x1.1d8f5672e4abdp+0,
> +    0x1.1cf06ada2811dp+0,
> +    0x1.1c522fc1ce059p+0,
> +    0x1.1bb4a4046ed29p+0,
> +    0x1.1b17c67f2bae3p+0,
> +    0x1.1a7b9611a7b96p+0,
> +    0x1.19e0119e0119ep+0,
> +    0x1.19453808ca29cp+0,
> +    0x1.18ab083902bdbp+0,
> +    0x1.1811811811812p+0,
> +    0x1.1778a191bd684p+0,
> +    0x1.16e0689427379p+0,
> +    0x1.1648d50fc3201p+0,
> +    0x1.15b1e5f75270dp+0,
> +    0x1.151b9a3fdd5c9p+0,
> +    0x1.1485f0e0acd3bp+0,
> +    0x1.13f0e8d344724p+0,
> +    0x1.135c81135c811p+0,
> +    0x1.12c8b89edc0acp+0,
> +    0x1.12358e75d3033p+0,
> +    0x1.11a3019a74826p+0,
> +    0x1.1111111111111p+0,
> +    0x1.107fbbe011080p+0,
> +    0x1.0fef010fef011p+0,
> +    0x1.0f5edfab325a2p+0,
> +    0x1.0ecf56be69c90p+0,
> +    0x1.0e40655826011p+0,
> +    0x1.0db20a88f4696p+0,
> +    0x1.0d24456359e3ap+0,
> +    0x1.0c9714fbcda3bp+0,
> +    0x1.0c0a7868b4171p+0,
> +    0x1.0b7e6ec259dc8p+0,
> +    0x1.0af2f722eecb5p+0,
> +    0x1.0a6810a6810a7p+0,
> +    0x1.09ddba6af8360p+0,
> +    0x1.0953f39010954p+0,
> +    0x1.08cabb37565e2p+0,
> +    0x1.0842108421084p+0,
> +    0x1.07b9f29b8eae2p+0,
> +    0x1.073260a47f7c6p+0,
> +    0x1.06ab59c7912fbp+0,
> +    0x1.0624dd2f1a9fcp+0,
> +    0x1.059eea0727586p+0,
> +    0x1.05197f7d73404p+0,
> +    0x1.04949cc1664c5p+0,
> +    0x1.0410410410410p+0,
> +    0x1.038c6b78247fcp+0,
> +    0x1.03091b51f5e1ap+0,
> +    0x1.02864fc7729e9p+0,
> +    0x1.0204081020408p+0,
> +    0x1.0182436517a37p+0,
> +    0x1.0101010101010p+0,
> +    0x1.0080402010080p+0,
> +    0x1.0000000000000p+0
> +};
> +
> +DECLARE_TABLE(double2, CBRT_DBL_TBL, 257) = {
> +    (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
> +    (double2)(0x1.0055380000000p+0, 0x1.e6a24c81e4294p-25),
> +    (double2)(0x1.00aa390000000p+0, 0x1.8548511e3a785p-26),
> +    (double2)(0x1.00ff010000000p+0, 0x1.4eb9336ec07f6p-25),
> +    (double2)(0x1.0153920000000p+0, 0x1.0ea64b8b750e1p-27),
> +    (double2)(0x1.01a7eb0000000p+0, 0x1.61637cff8a53cp-27),
> +    (double2)(0x1.01fc0d0000000p+0, 0x1.0733bf7bd1943p-27),
> +    (double2)(0x1.024ff80000000p+0, 0x1.666911345ccedp-26),
> +    (double2)(0x1.02a3ad0000000p+0, 0x1.77b7a3f592f14p-27),
> +    (double2)(0x1.02f72b0000000p+0, 0x1.f18d3dd1a5402p-25),
> +    (double2)(0x1.034a750000000p+0, 0x1.be2f5a58ee9a4p-29),
> +    (double2)(0x1.039d880000000p+0, 0x1.8901f8f085fa7p-25),
> +    (double2)(0x1.03f0670000000p+0, 0x1.c68b8cd5b5d69p-26),
> +    (double2)(0x1.0443110000000p+0, 0x1.a6b0e8624be42p-26),
> +    (double2)(0x1.0495870000000p+0, 0x1.c4b22b06f68e7p-36),
> +    (double2)(0x1.04e7c80000000p+0, 0x1.0f3f0afcabe9bp-25),
> +    (double2)(0x1.0539d60000000p+0, 0x1.48495bca4e1b7p-26),
> +    (double2)(0x1.058bb00000000p+0, 0x1.6107f1abdfdc3p-25),
> +    (double2)(0x1.05dd570000000p+0, 0x1.e67261878288ap-25),
> +    (double2)(0x1.062ecc0000000p+0, 0x1.a6bc155286f1ep-26),
> +    (double2)(0x1.06800e0000000p+0, 0x1.8a759c64a85f2p-26),
> +    (double2)(0x1.06d11e0000000p+0, 0x1.5fce70a4a8d09p-27),
> +    (double2)(0x1.0721fc0000000p+0, 0x1.2f9cbf373fe1dp-28),
> +    (double2)(0x1.0772a80000000p+0, 0x1.90564ce4ac359p-26),
> +    (double2)(0x1.07c3230000000p+0, 0x1.ac29ce761b02fp-26),
> +    (double2)(0x1.08136d0000000p+0, 0x1.cb752f497381cp-26),
> +    (double2)(0x1.0863860000000p+0, 0x1.8bb9e1cfb35e0p-25),
> +    (double2)(0x1.08b36f0000000p+0, 0x1.5b4917099de90p-25),
> +    (double2)(0x1.0903280000000p+0, 0x1.cc77ac9c65ef2p-26),
> +    (double2)(0x1.0952b10000000p+0, 0x1.7a0f3e7be3dbap-26),
> +    (double2)(0x1.09a20a0000000p+0, 0x1.6ec851ee0c16fp-25),
> +    (double2)(0x1.09f1340000000p+0, 0x1.89449bf2946dap-25),
> +    (double2)(0x1.0a402f0000000p+0, 0x1.98f25301ba223p-25),
> +    (double2)(0x1.0a8efc0000000p+0, 0x1.47d5ec651f549p-28),
> +    (double2)(0x1.0add990000000p+0, 0x1.c33ec9a86007ap-25),
> +    (double2)(0x1.0b2c090000000p+0, 0x1.e0b6653e92649p-26),
> +    (double2)(0x1.0b7a4b0000000p+0, 0x1.bd64ac09d755fp-28),
> +    (double2)(0x1.0bc85f0000000p+0, 0x1.f537506f78167p-29),
> +    (double2)(0x1.0c16450000000p+0, 0x1.2c382d1b3735ep-25),
> +    (double2)(0x1.0c63fe0000000p+0, 0x1.e20ed659f99e1p-25),
> +    (double2)(0x1.0cb18b0000000p+0, 0x1.86b633a9c182ap-26),
> +    (double2)(0x1.0cfeeb0000000p+0, 0x1.45cfd5a65e777p-27),
> +    (double2)(0x1.0d4c1e0000000p+0, 0x1.0c8770f58bca4p-25),
> +    (double2)(0x1.0d99250000000p+0, 0x1.739e44b0933c5p-25),
> +    (double2)(0x1.0de6010000000p+0, 0x1.27dc3d9ce7bd8p-31),
> +    (double2)(0x1.0e32b00000000p+0, 0x1.3c53c7c5a7b64p-25),
> +    (double2)(0x1.0e7f340000000p+0, 0x1.9669683830cecp-25),
> +    (double2)(0x1.0ecb8d0000000p+0, 0x1.8d772c39bdcc4p-25),
> +    (double2)(0x1.0f17bb0000000p+0, 0x1.9b0008bcf6d7bp-25),
> +    (double2)(0x1.0f63bf0000000p+0, 0x1.bbb305825ce4fp-28),
> +    (double2)(0x1.0faf970000000p+0, 0x1.da3f4af13a406p-25),
> +    (double2)(0x1.0ffb460000000p+0, 0x1.f36b96f74ce86p-26),
> +    (double2)(0x1.1046cb0000000p+0, 0x1.65c002303f790p-30),
> +    (double2)(0x1.1092250000000p+0, 0x1.82f84095ba7d5p-25),
> +    (double2)(0x1.10dd560000000p+0, 0x1.d46433541b2c6p-25),
> +    (double2)(0x1.11285e0000000p+0, 0x1.71c3d56e93a89p-25),
> +    (double2)(0x1.11733d0000000p+0, 0x1.98dcef4e40012p-26),
> +    (double2)(0x1.11bdf30000000p+0, 0x1.530ebef17fe03p-27),
> +    (double2)(0x1.1208800000000p+0, 0x1.e8b8fa3715066p-27),
> +    (double2)(0x1.1252e40000000p+0, 0x1.ab26eb3b211dcp-25),
> +    (double2)(0x1.129d210000000p+0, 0x1.54dd4dc906307p-27),
> +    (double2)(0x1.12e7350000000p+0, 0x1.c9f962387984ep-26),
> +    (double2)(0x1.1331210000000p+0, 0x1.c62a959afec09p-25),
> +    (double2)(0x1.137ae60000000p+0, 0x1.638d9ac6a866ap-25),
> +    (double2)(0x1.13c4840000000p+0, 0x1.38704eca8a22dp-28),
> +    (double2)(0x1.140dfa0000000p+0, 0x1.e6c9e1db14f8fp-27),
> +    (double2)(0x1.1457490000000p+0, 0x1.8744b7f9c9eaap-26),
> +    (double2)(0x1.14a0710000000p+0, 0x1.6c2893486373bp-25),
> +    (double2)(0x1.14e9730000000p+0, 0x1.b36bce31699b7p-26),
> +    (double2)(0x1.15324e0000000p+0, 0x1.71e3813d200c7p-25),
> +    (double2)(0x1.157b030000000p+0, 0x1.99755ab40aa88p-25),
> +    (double2)(0x1.15c3920000000p+0, 0x1.b45ca0e4bcfc0p-25),
> +    (double2)(0x1.160bfc0000000p+0, 0x1.2dd090d869c5dp-28),
> +    (double2)(0x1.16543f0000000p+0, 0x1.4fe0516b917dap-25),
> +    (double2)(0x1.169c5d0000000p+0, 0x1.94563226317a2p-25),
> +    (double2)(0x1.16e4560000000p+0, 0x1.53d8fafc2c851p-25),
> +    (double2)(0x1.172c2a0000000p+0, 0x1.dcbd41fbd41a3p-26),
> +    (double2)(0x1.1773d90000000p+0, 0x1.862ff5285f59cp-26),
> +    (double2)(0x1.17bb630000000p+0, 0x1.3072ea97a1e1cp-25),
> +    (double2)(0x1.1802c90000000p+0, 0x1.2839075184805p-26),
> +    (double2)(0x1.184a0a0000000p+0, 0x1.4b0323e9eff42p-25),
> +    (double2)(0x1.1891270000000p+0, 0x1.b158893c45484p-25),
> +    (double2)(0x1.18d8210000000p+0, 0x1.149ef0fc35826p-28),
> +    (double2)(0x1.191ef60000000p+0, 0x1.f2e77ea96acaap-26),
> +    (double2)(0x1.1965a80000000p+0, 0x1.200074c471a95p-26),
> +    (double2)(0x1.19ac360000000p+0, 0x1.3f8cc517f6f04p-25),
> +    (double2)(0x1.19f2a10000000p+0, 0x1.60ba2e311bb55p-25),
> +    (double2)(0x1.1a38e90000000p+0, 0x1.4b788730bbec3p-25),
> +    (double2)(0x1.1a7f0e0000000p+0, 0x1.57090795ee20cp-25),
> +    (double2)(0x1.1ac5100000000p+0, 0x1.d9ffe983670b1p-25),
> +    (double2)(0x1.1b0af00000000p+0, 0x1.2a463ff61bfdap-25),
> +    (double2)(0x1.1b50ad0000000p+0, 0x1.9d1bc6a5e65cfp-25),
> +    (double2)(0x1.1b96480000000p+0, 0x1.8718abaa9e922p-25),
> +    (double2)(0x1.1bdbc10000000p+0, 0x1.3c2f52ffa342ep-25),
> +    (double2)(0x1.1c21180000000p+0, 0x1.0fae13ff42c80p-25),
> +    (double2)(0x1.1c664d0000000p+0, 0x1.5440f0ef00d57p-25),
> +    (double2)(0x1.1cab610000000p+0, 0x1.6fcd22d4e3c1ep-27),
> +    (double2)(0x1.1cf0530000000p+0, 0x1.e0c60b409e863p-27),
> +    (double2)(0x1.1d35230000000p+0, 0x1.f9cab5a5f0333p-25),
> +    (double2)(0x1.1d79d30000000p+0, 0x1.30f24744c333dp-25),
> +    (double2)(0x1.1dbe620000000p+0, 0x1.b50622a76b2fep-27),
> +    (double2)(0x1.1e02cf0000000p+0, 0x1.fdb94ba595375p-25),
> +    (double2)(0x1.1e471d0000000p+0, 0x1.861b9b945a171p-28),
> +    (double2)(0x1.1e8b490000000p+0, 0x1.54348015188c4p-25),
> +    (double2)(0x1.1ecf550000000p+0, 0x1.b54d149865523p-25),
> +    (double2)(0x1.1f13410000000p+0, 0x1.a0bb783d9de33p-25),
> +    (double2)(0x1.1f570d0000000p+0, 0x1.629d12b1a2157p-25),
> +    (double2)(0x1.1f9ab90000000p+0, 0x1.467fe35d179dfp-25),
> +    (double2)(0x1.1fde450000000p+0, 0x1.9763f3e26c8f7p-25),
> +    (double2)(0x1.2021b20000000p+0, 0x1.3f798bb9f7679p-26),
> +    (double2)(0x1.2064ff0000000p+0, 0x1.52e577e855898p-26),
> +    (double2)(0x1.20a82c0000000p+0, 0x1.fde47e5502c3ap-25),
> +    (double2)(0x1.20eb3b0000000p+0, 0x1.cbd0b548d96a0p-26),
> +    (double2)(0x1.212e2a0000000p+0, 0x1.a9cd9f7be8de8p-25),
> +    (double2)(0x1.2170fb0000000p+0, 0x1.22bbe704886dep-26),
> +    (double2)(0x1.21b3ac0000000p+0, 0x1.e3dea8317f020p-25),
> +    (double2)(0x1.21f63f0000000p+0, 0x1.e812085ac8855p-25),
> +    (double2)(0x1.2238b40000000p+0, 0x1.c87144f24cb07p-26),
> +    (double2)(0x1.227b0a0000000p+0, 0x1.1e128ee311fa2p-25),
> +    (double2)(0x1.22bd420000000p+0, 0x1.b5c163d61a2d3p-26),
> +    (double2)(0x1.22ff5c0000000p+0, 0x1.7d97e7fb90633p-27),
> +    (double2)(0x1.2341570000000p+0, 0x1.efe899d50f6a7p-25),
> +    (double2)(0x1.2383350000000p+0, 0x1.d0333eb75de5ap-25),
> +    (double2)(0x1.23c4f60000000p+0, 0x1.0e590be73a573p-27),
> +    (double2)(0x1.2406980000000p+0, 0x1.8ce8dcac3cdd2p-25),
> +    (double2)(0x1.24481d0000000p+0, 0x1.ee8a48954064bp-25),
> +    (double2)(0x1.2489850000000p+0, 0x1.aa62f18461e09p-25),
> +    (double2)(0x1.24cad00000000p+0, 0x1.01e5940986a15p-25),
> +    (double2)(0x1.250bfe0000000p+0, 0x1.b082f4f9b8d4cp-28),
> +    (double2)(0x1.254d0e0000000p+0, 0x1.876e0e5527f5ap-25),
> +    (double2)(0x1.258e020000000p+0, 0x1.3617080831e6bp-25),
> +    (double2)(0x1.25ced90000000p+0, 0x1.81b26e34aa4a2p-25),
> +    (double2)(0x1.260f940000000p+0, 0x1.52ee66dfab0c1p-26),
> +    (double2)(0x1.2650320000000p+0, 0x1.d85a5329e8819p-26),
> +    (double2)(0x1.2690b40000000p+0, 0x1.105c1b646b5d1p-26),
> +    (double2)(0x1.26d1190000000p+0, 0x1.bb6690c1a379cp-25),
> +    (double2)(0x1.2711630000000p+0, 0x1.86aeba73ce3a9p-26),
> +    (double2)(0x1.2751900000000p+0, 0x1.dd16198294dd4p-25),
> +    (double2)(0x1.2791a20000000p+0, 0x1.454e675775e83p-25),
> +    (double2)(0x1.27d1980000000p+0, 0x1.3842e026197eap-25),
> +    (double2)(0x1.2811720000000p+0, 0x1.f1ce0e70c44d2p-25),
> +    (double2)(0x1.2851310000000p+0, 0x1.ad636441a5627p-25),
> +    (double2)(0x1.2890d50000000p+0, 0x1.4c205d7212abbp-26),
> +    (double2)(0x1.28d05d0000000p+0, 0x1.167c86c116419p-25),
> +    (double2)(0x1.290fca0000000p+0, 0x1.38ec3ef16e294p-25),
> +    (double2)(0x1.294f1c0000000p+0, 0x1.473fceace9321p-25),
> +    (double2)(0x1.298e530000000p+0, 0x1.7af53a836dba7p-25),
> +    (double2)(0x1.29cd700000000p+0, 0x1.a51f3c383b652p-30),
> +    (double2)(0x1.2a0c710000000p+0, 0x1.3696da190822dp-25),
> +    (double2)(0x1.2a4b580000000p+0, 0x1.2f9adec77074bp-25),
> +    (double2)(0x1.2a8a250000000p+0, 0x1.8190fd5bee55fp-28),
> +    (double2)(0x1.2ac8d70000000p+0, 0x1.bfee8fac68e55p-27),
> +    (double2)(0x1.2b076f0000000p+0, 0x1.31c9d6bc5f68ap-28),
> +    (double2)(0x1.2b45ec0000000p+0, 0x1.89d0523737edfp-25),
> +    (double2)(0x1.2b84500000000p+0, 0x1.a295943bf47bbp-26),
> +    (double2)(0x1.2bc29a0000000p+0, 0x1.96be32e5b3207p-28),
> +    (double2)(0x1.2c00c90000000p+0, 0x1.e44c7d909fa0ep-25),
> +    (double2)(0x1.2c3ee00000000p+0, 0x1.b2505da94d9eap-29),
> +    (double2)(0x1.2c7cdc0000000p+0, 0x1.0c851f46c9c98p-25),
> +    (double2)(0x1.2cbabf0000000p+0, 0x1.da71f7d9aa3b7p-26),
> +    (double2)(0x1.2cf8880000000p+0, 0x1.f1b605d019ef1p-25),
> +    (double2)(0x1.2d36390000000p+0, 0x1.386e8a2189563p-27),
> +    (double2)(0x1.2d73d00000000p+0, 0x1.b19fa5d306ba7p-28),
> +    (double2)(0x1.2db14d0000000p+0, 0x1.dd749b67aef76p-25),
> +    (double2)(0x1.2deeb20000000p+0, 0x1.76ff6f1dc04b0p-25),
> +    (double2)(0x1.2e2bfe0000000p+0, 0x1.35a33d0b232a6p-25),
> +    (double2)(0x1.2e69310000000p+0, 0x1.4bdc80024a4e1p-25),
> +    (double2)(0x1.2ea64b0000000p+0, 0x1.ebd61770fd723p-25),
> +    (double2)(0x1.2ee34d0000000p+0, 0x1.4769fc537264dp-25),
> +    (double2)(0x1.2f20360000000p+0, 0x1.9021f429f3b98p-25),
> +    (double2)(0x1.2f5d070000000p+0, 0x1.ee7083efbd606p-26),
> +    (double2)(0x1.2f99bf0000000p+0, 0x1.ad985552a6b1ap-25),
> +    (double2)(0x1.2fd65f0000000p+0, 0x1.e3df778772160p-25),
> +    (double2)(0x1.3012e70000000p+0, 0x1.ca5d76ddc9b34p-25),
> +    (double2)(0x1.304f570000000p+0, 0x1.91154ffdbaf74p-25),
> +    (double2)(0x1.308baf0000000p+0, 0x1.67bdd57fb306ap-25),
> +    (double2)(0x1.30c7ef0000000p+0, 0x1.7dc255ac40886p-25),
> +    (double2)(0x1.3104180000000p+0, 0x1.219f38e8afafep-32),
> +    (double2)(0x1.3140280000000p+0, 0x1.2416bf9669a04p-25),
> +    (double2)(0x1.317c210000000p+0, 0x1.11c96b2b3987fp-25),
> +    (double2)(0x1.31b8020000000p+0, 0x1.f99ed447e1177p-25),
> +    (double2)(0x1.31f3cd0000000p+0, 0x1.3245826328a11p-30),
> +    (double2)(0x1.322f7f0000000p+0, 0x1.6f56dd1e645f8p-25),
> +    (double2)(0x1.326b1b0000000p+0, 0x1.6164946945535p-27),
> +    (double2)(0x1.32a69f0000000p+0, 0x1.e37d59d190028p-26),
> +    (double2)(0x1.32e20c0000000p+0, 0x1.68671f12bf828p-25),
> +    (double2)(0x1.331d620000000p+0, 0x1.e8ecbca6aabbdp-25),
> +    (double2)(0x1.3358a20000000p+0, 0x1.3f49e109a5912p-26),
> +    (double2)(0x1.3393ca0000000p+0, 0x1.b8a0e11ec3043p-25),
> +    (double2)(0x1.33cedc0000000p+0, 0x1.5fae00aed691ap-25),
> +    (double2)(0x1.3409d70000000p+0, 0x1.c0569bece3e4ap-25),
> +    (double2)(0x1.3444bc0000000p+0, 0x1.05e26744efbfep-25),
> +    (double2)(0x1.347f8a0000000p+0, 0x1.5b570a94be5c5p-25),
> +    (double2)(0x1.34ba420000000p+0, 0x1.d6f156ea0e063p-26),
> +    (double2)(0x1.34f4e30000000p+0, 0x1.e0ca7612fc484p-25),
> +    (double2)(0x1.352f6f0000000p+0, 0x1.963c927b25258p-27),
> +    (double2)(0x1.3569e40000000p+0, 0x1.47930aa725a5cp-26),
> +    (double2)(0x1.35a4430000000p+0, 0x1.8a79fe3af43b3p-26),
> +    (double2)(0x1.35de8c0000000p+0, 0x1.e6dc29c41bdafp-26),
> +    (double2)(0x1.3618bf0000000p+0, 0x1.57a2e76f863a5p-25),
> +    (double2)(0x1.3652dd0000000p+0, 0x1.ae3b61716354dp-29),
> +    (double2)(0x1.368ce40000000p+0, 0x1.65fb5df6906b1p-25),
> +    (double2)(0x1.36c6d60000000p+0, 0x1.6177d7f588f7bp-25),
> +    (double2)(0x1.3700b30000000p+0, 0x1.ad55abd091b67p-28),
> +    (double2)(0x1.373a7a0000000p+0, 0x1.55337b2422d76p-30),
> +    (double2)(0x1.37742b0000000p+0, 0x1.084ebe86972d5p-25),
> +    (double2)(0x1.37adc70000000p+0, 0x1.56395808e1ea3p-25),
> +    (double2)(0x1.37e74e0000000p+0, 0x1.1bce21b40fba7p-25),
> +    (double2)(0x1.3820c00000000p+0, 0x1.006f94605b515p-26),
> +    (double2)(0x1.385a1c0000000p+0, 0x1.aa676aceb1f7dp-25),
> +    (double2)(0x1.3893640000000p+0, 0x1.8229f76554ce6p-26),
> +    (double2)(0x1.38cc960000000p+0, 0x1.eabfc6cf57330p-25),
> +    (double2)(0x1.3905b40000000p+0, 0x1.4daed9c0ce8bcp-25),
> +    (double2)(0x1.393ebd0000000p+0, 0x1.0ff1768237141p-25),
> +    (double2)(0x1.3977b10000000p+0, 0x1.575f83051b085p-25),
> +    (double2)(0x1.39b0910000000p+0, 0x1.2667deb523e29p-27),
> +    (double2)(0x1.39e95c0000000p+0, 0x1.816996954f4fdp-30),
> +    (double2)(0x1.3a22120000000p+0, 0x1.87cfccf4d9cd4p-26),
> +    (double2)(0x1.3a5ab40000000p+0, 0x1.2c5d018198353p-26),
> +    (double2)(0x1.3a93410000000p+0, 0x1.a7a898dcc34aap-25),
> +    (double2)(0x1.3acbbb0000000p+0, 0x1.cead6dadc36d1p-29),
> +    (double2)(0x1.3b04200000000p+0, 0x1.a55759c498bdfp-29),
> +    (double2)(0x1.3b3c700000000p+0, 0x1.c414a9ef6de04p-25),
> +    (double2)(0x1.3b74ad0000000p+0, 0x1.3e2108a6e58fap-25),
> +    (double2)(0x1.3bacd60000000p+0, 0x1.587fd7643d77cp-26),
> +    (double2)(0x1.3be4eb0000000p+0, 0x1.901eb1d3ff3dfp-28),
> +    (double2)(0x1.3c1ceb0000000p+0, 0x1.f2ccd7c812fc6p-25),
> +    (double2)(0x1.3c54d90000000p+0, 0x1.1c8ee70a01049p-29),
> +    (double2)(0x1.3c8cb20000000p+0, 0x1.63e8d02831eecp-26),
> +    (double2)(0x1.3cc4770000000p+0, 0x1.f61a42a92c7ffp-25),
> +    (double2)(0x1.3cfc2a0000000p+0, 0x1.a917399c84d24p-34),
> +    (double2)(0x1.3d33c80000000p+0, 0x1.e9197c8eec2f0p-26),
> +    (double2)(0x1.3d6b530000000p+0, 0x1.e6f842f5a1378p-26),
> +    (double2)(0x1.3da2cb0000000p+0, 0x1.fac242a90a0fcp-29),
> +    (double2)(0x1.3dda2f0000000p+0, 0x1.35ed726610227p-26),
> +    (double2)(0x1.3e11800000000p+0, 0x1.0e0d64804b15bp-26),
> +    (double2)(0x1.3e48be0000000p+0, 0x1.560675daba814p-31),
> +    (double2)(0x1.3e7fe80000000p+0, 0x1.37388c8768032p-25),
> +    (double2)(0x1.3eb7000000000p+0, 0x1.ee3c89f9e01f5p-28),
> +    (double2)(0x1.3eee040000000p+0, 0x1.39f6f0d09747cp-25),
> +    (double2)(0x1.3f24f60000000p+0, 0x1.322c327abb8f0p-27),
> +    (double2)(0x1.3f5bd40000000p+0, 0x1.961b347c8ac80p-25),
> +    (double2)(0x1.3f92a00000000p+0, 0x1.3711fbbd0f118p-25),
> +    (double2)(0x1.3fc9590000000p+0, 0x1.4fad8d7718ffbp-25),
> +    (double2)(0x1.3fffff0000000p+0, 0x1.fffffffffffffp-25),
> +    (double2)(0x1.4036930000000p+0, 0x1.67efa79ec35b4p-25),
> +    (double2)(0x1.406d140000000p+0, 0x1.a737687a254a8p-25),
> +    (double2)(0x1.40a3830000000p+0, 0x1.bace0f87d924dp-26),
> +    (double2)(0x1.40d9df0000000p+0, 0x1.29e37c237e392p-25),
> +    (double2)(0x1.4110290000000p+0, 0x1.57ce7ac3f3012p-26),
> +    (double2)(0x1.4146600000000p+0, 0x1.82829359f8fbdp-25),
> +    (double2)(0x1.417c850000000p+0, 0x1.cc9be42d14676p-25),
> +    (double2)(0x1.41b2980000000p+0, 0x1.a8f001c137d0bp-25),
> +    (double2)(0x1.41e8990000000p+0, 0x1.36127687dda05p-25),
> +    (double2)(0x1.421e880000000p+0, 0x1.24dba322646f0p-26),
> +    (double2)(0x1.4254640000000p+0, 0x1.dc43f1ed210b4p-25),
> +    (double2)(0x1.428a2f0000000p+0, 0x1.31ae515c447bbp-25)
> +};
> +
> +
> +DECLARE_TABLE(double2, CBRT_REM_TBL, 5) = {
> +    (double2)(0x1.428a2f0000000p-1, 0x1.31ae515c447bbp-26),
> +    (double2)(0x1.965fea0000000p-1, 0x1.4f5b8f20ac166p-27),
> +    (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
> +    (double2)(0x1.428a2f0000000p+0, 0x1.31ae515c447bbp-25),
> +    (double2)(0x1.965fea0000000p+0, 0x1.4f5b8f20ac166p-26),
> +};
> +
>  TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
>  TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
>  TABLE_FUNCTION(double2, SINH_TBL, sinh_tbl);
>  TABLE_FUNCTION(double2, COSH_TBL, cosh_tbl);
> +TABLE_FUNCTION(double, CBRT_INV_TBL, cbrt_inv_tbl);
> +TABLE_FUNCTION(double2, CBRT_DBL_TBL, cbrt_dbl_tbl);
> +TABLE_FUNCTION(double2, CBRT_REM_TBL, cbrt_rem_tbl);
>
>  #endif // cl_khr_fp64
> diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
> index bee3906..7fd16a0 100644
> --- a/generic/lib/math/tables.h
> +++ b/generic/lib/math/tables.h
> @@ -42,6 +42,7 @@ TABLE_FUNCTION_DECL(float2, loge_tbl);
>  TABLE_FUNCTION_DECL(float, log_inv_tbl);
>  TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
>  TABLE_FUNCTION_DECL(float2, sinhcosh_tbl);
> +TABLE_FUNCTION_DECL(float2, cbrt_tbl);
>
>  #ifdef cl_khr_fp64
>
> @@ -52,5 +53,8 @@ TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
>  TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
>  TABLE_FUNCTION_DECL(double2, sinh_tbl);
>  TABLE_FUNCTION_DECL(double2, cosh_tbl);
> +TABLE_FUNCTION_DECL(double, cbrt_inv_tbl);
> +TABLE_FUNCTION_DECL(double2, cbrt_dbl_tbl);
> +TABLE_FUNCTION_DECL(double2, cbrt_rem_tbl);
>
>  #endif // cl_khr_fp64
> --
> 2.0.4
>
>
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20160202/f5db903b/attachment.html>


More information about the Libclc-dev mailing list