[Libclc-dev] [PATCH 12/15] Implement exp2 using OpenCL C rather than using an intrinsic

Fri Apr 10 16:00:34 PDT 2015

On Tue, 2015-04-07 at 18:05 +0000, Tom Stellard wrote:
> Not all targets support the intrinsic, so it's better to have a
> generic implementation which does not use it.

LGTM.

Which targets are those? I found exp_ieee in manuals (r600-NI) and
v_exp_f32 in SI.
Also related to 5/15 (and possibly other functions), is there a way to
specify implementations per chip class, or does it need some extra
infrastructure work?

jan

> 
> This exp2 implementation was ported from the AMD builtin library
> and has been tested with piglit, OpenCV, and the ocl conformance tests.
> ---
>  generic/include/clc/math/exp2.h   | 28 ++++++++++---
>  generic/include/clc/math/exp2.inc | 23 +++++++++++
>  generic/lib/SOURCES               |  2 +
>  generic/lib/math/exp2.cl          | 86 +++++++++++++++++++++++++++++++++++++++
>  generic/lib/math/exp_helper.cl    | 69 +++++++++++++++++++++++++++++++
>  generic/lib/math/exp_helper.h     | 29 +++++++++++++
>  generic/lib/math/tables.cl        | 70 +++++++++++++++++++++++++++++++
>  generic/lib/math/tables.h         |  2 +-
>  8 files changed, 303 insertions(+), 6 deletions(-)
>  create mode 100644 generic/include/clc/math/exp2.inc
>  create mode 100644 generic/lib/math/exp2.cl
>  create mode 100644 generic/lib/math/exp_helper.cl
>  create mode 100644 generic/lib/math/exp_helper.h
> 
> diff --git a/generic/include/clc/math/exp2.h b/generic/include/clc/math/exp2.h
> index ec0dad2..14167e8 100644
> --- a/generic/include/clc/math/exp2.h
> +++ b/generic/include/clc/math/exp2.h
> @@ -1,6 +1,24 @@
> -#undef exp2
> -#define exp2 __clc_exp2
> +/*
> + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
>  
> -#define __CLC_FUNCTION __clc_exp2
> -#define __CLC_INTRINSIC "llvm.exp2"
> -#include <clc/math/unary_intrin.inc>
> +#define __CLC_BODY <clc/math/exp2.inc>
> +#include <clc/math/gentype.inc>
> diff --git a/generic/include/clc/math/exp2.inc b/generic/include/clc/math/exp2.inc
> new file mode 100644
> index 0000000..3ecaae6
> --- /dev/null
> +++ b/generic/include/clc/math/exp2.inc
> @@ -0,0 +1,23 @@
> +/*
> + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE exp2(__CLC_GENTYPE x);
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 0e8c7d9..9b0986c 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -79,6 +79,8 @@ math/cospi.cl
>  math/ep_log.cl
>  math/erfc.cl
>  math/exp.cl
> +math/exp_helper.cl
> +math/exp2.cl
>  math/exp10.cl
>  math/fmax.cl
>  math/fmin.cl
> diff --git a/generic/lib/math/exp2.cl b/generic/lib/math/exp2.cl
> new file mode 100644
> index 0000000..1ddccbd
> --- /dev/null
> +++ b/generic/lib/math/exp2.cl
> @@ -0,0 +1,86 @@
> +/*
> + * Copyright (c) 2014,2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include <clc/clc.h>
> +
> +#include "math.h"
> +#include "../clcmacro.h"
> +
> +_CLC_OVERLOAD _CLC_DEF float exp2(float x) {
> +
> +    // Reduce x
> +    const float ln2HI = 0x1.62e300p-1f;
> +    const float ln2LO = 0x1.2fefa2p-17f;
> +
> +    float t = rint(x);
> +    int p = (int)t;
> +    float tt = x - t;
> +    float hi = tt * ln2HI;
> +    float lo = tt * ln2LO;
> +
> +    // Evaluate poly
> +    t = hi + lo;
> +    tt  = t*t;
> +    float v = mad(tt,
> +                  -mad(tt,
> +		       mad(tt,
> +		           mad(tt,
> +			       mad(tt, 0x1.637698p-25f, -0x1.bbd41cp-20f),
> +                               0x1.1566aap-14f),
> +                           -0x1.6c16c2p-9f),
> +                       0x1.555556p-3f),
> +                  t);
> +
> +    float y = 1.0f - (((-lo) - MATH_DIVIDE(t * v, 2.0f - v)) - hi);
> +
> +    // Scale by 2^p
> +    float r =  as_float(as_int(y) + (p << 23));
> +
> +    const float ulim =  128.0f;
> +    const float llim = -126.0f;
> +
> +    r = x < llim ? 0.0f : r;
> +    r = x < ulim ? r : as_float(0x7f800000);
> +    return isnan(x) ? x : r;
> +}
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, exp2, float)
> +
> +#ifdef cl_khr_fp64
> +
> +#include "exp_helper.h"
> +
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +
> +_CLC_OVERLOAD _CLC_DEF double exp2(double x) {
> +    const double R_LN2 = 0x1.62e42fefa39efp-1; // ln(2)
> +    const double R_1_BY_64 = 1.0 / 64.0;
> +
> +    int n = convert_int(x * 64.0);
> +    double r = R_LN2 * fma(-R_1_BY_64, (double)n, x); 
> +    return __clc_exp_helper(x, -1074.0, 1024.0, r, n);
> +}
> +
> +
> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, exp2, double)
> +
> +#endif
> diff --git a/generic/lib/math/exp_helper.cl b/generic/lib/math/exp_helper.cl
> new file mode 100644
> index 0000000..046f306
> --- /dev/null
> +++ b/generic/lib/math/exp_helper.cl
> @@ -0,0 +1,69 @@
> +/*
> + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include <clc/clc.h>
> +
> +#include "math.h"
> +#include "tables.h"
> +
> +#ifdef cl_khr_fp64
> +
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +
> +_CLC_DEF double __clc_exp_helper(double x, double x_min, double x_max, double r, int n) {
> +
> +    int j = n & 0x3f;
> +    int m = n >> 6;
> +
> +    // 6 term tail of Taylor expansion of e^r
> +    double z2 = r * fma(r,
> +	                fma(r,
> +		            fma(r,
> +			        fma(r,
> +			            fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
> +			            0x1.5555555555555p-5),
> +			        0x1.5555555555555p-3),
> +		            0x1.0000000000000p-1),
> +		        1.0);
> +
> +    double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
> +    z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
> +
> +    int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
> +
> +    int n1 = m >> 2;
> +    int n2 = m-n1;
> +    double z3= z2 * as_double(((long)n1 + 1023) << 52);
> +    z3 *= as_double(((long)n2 + 1023) << 52);
> +
> +    z2 = ldexp(z2, m);
> +    z2 = small_value ? z3: z2;
> +
> +    z2 = isnan(x) ? x : z2;
> +
> +    z2 = x > x_max ? as_double(PINFBITPATT_DP64) : z2;
> +    z2 = x < x_min ? 0.0 : z2;
> +
> +    return z2;
> +}
> +
> +#endif // cl_khr_fp64
> diff --git a/generic/lib/math/exp_helper.h b/generic/lib/math/exp_helper.h
> new file mode 100644
> index 0000000..e6df2fd
> --- /dev/null
> +++ b/generic/lib/math/exp_helper.h
> @@ -0,0 +1,29 @@
> +/*
> + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +
> +#ifdef cl_khr_fp64
> +
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +_CLC_DECL double __clc_exp_helper(double x, double x_min, double x_max, double r, int n);
> +
> +#endif
> diff --git a/generic/lib/math/tables.cl b/generic/lib/math/tables.cl
> index 090e64a..5a620ec 100644
> --- a/generic/lib/math/tables.cl
> +++ b/generic/lib/math/tables.cl
> @@ -634,6 +634,76 @@ DECLARE_TABLE(double2, ATAN_JBY256_TBL, 241) = {
>      (double2)(0x1.921fb00000000p-1, 0x1.5110b4611a626p-23),
>  };
>  
> +DECLARE_TABLE(double2, TWO_TO_JBY64_EP, 64) = {
> +    (double2)(0x1.0000000000000p+0, 0x0.0000000000000p+0),
> +    (double2)(0x1.02c9a30000000p+0, 0x1.cef00c1dcdef9p-25),
> +    (double2)(0x1.059b0d0000000p+0, 0x1.8ac2ba1d73e2ap-27),
> +    (double2)(0x1.0874510000000p+0, 0x1.0eb37901186bep-25),
> +    (double2)(0x1.0b55860000000p+0, 0x1.9f3121ec53172p-25),
> +    (double2)(0x1.0e3ec30000000p+0, 0x1.69e8d10103a17p-27),
> +    (double2)(0x1.11301d0000000p+0, 0x1.25b50a4ebbf1ap-32),
> +    (double2)(0x1.1429aa0000000p+0, 0x1.d525bbf668203p-25),
> +    (double2)(0x1.172b830000000p+0, 0x1.8faa2f5b9bef9p-25),
> +    (double2)(0x1.1a35be0000000p+0, 0x1.6df96ea796d31p-25),
> +    (double2)(0x1.1d48730000000p+0, 0x1.68b9aa7805b80p-28),
> +    (double2)(0x1.2063b80000000p+0, 0x1.0c519ac771dd6p-25),
> +    (double2)(0x1.2387a60000000p+0, 0x1.ceac470cd83f5p-25),
> +    (double2)(0x1.26b4560000000p+0, 0x1.789f37495e99cp-26),
> +    (double2)(0x1.29e9df0000000p+0, 0x1.47f7b84b09745p-26),
> +    (double2)(0x1.2d285a0000000p+0, 0x1.b900c2d002475p-26),
> +    (double2)(0x1.306fe00000000p+0, 0x1.4636e2a5bd1abp-25),
> +    (double2)(0x1.33c08b0000000p+0, 0x1.320b7fa64e430p-27),
> +    (double2)(0x1.371a730000000p+0, 0x1.ceaa72a9c5154p-26),
> +    (double2)(0x1.3a7db30000000p+0, 0x1.3967fdba86f24p-26),
> +    (double2)(0x1.3dea640000000p+0, 0x1.82468446b6824p-25),
> +    (double2)(0x1.4160a20000000p+0, 0x1.f72e29f84325bp-28),
> +    (double2)(0x1.44e0860000000p+0, 0x1.8624b40c4dbd0p-30),
> +    (double2)(0x1.486a2b0000000p+0, 0x1.704f3404f068ep-26),
> +    (double2)(0x1.4bfdad0000000p+0, 0x1.4d8a89c750e5ep-26),
> +    (double2)(0x1.4f9b270000000p+0, 0x1.a74b29ab4cf62p-26),
> +    (double2)(0x1.5342b50000000p+0, 0x1.a753e077c2a0fp-26),
> +    (double2)(0x1.56f4730000000p+0, 0x1.ad49f699bb2c0p-26),
> +    (double2)(0x1.5ab07d0000000p+0, 0x1.a90a852b19260p-25),
> +    (double2)(0x1.5e76f10000000p+0, 0x1.6b48521ba6f93p-26),
> +    (double2)(0x1.6247eb0000000p+0, 0x1.d2ac258f87d03p-31),
> +    (double2)(0x1.6623880000000p+0, 0x1.2a91124893ecfp-27),
> +    (double2)(0x1.6a09e60000000p+0, 0x1.9fcef32422cbep-26),
> +    (double2)(0x1.6dfb230000000p+0, 0x1.8ca345de441c5p-25),
> +    (double2)(0x1.71f75e0000000p+0, 0x1.1d8bee7ba46e1p-25),
> +    (double2)(0x1.75feb50000000p+0, 0x1.9099f22fdba6ap-26),
> +    (double2)(0x1.7a11470000000p+0, 0x1.f580c36bea881p-27),
> +    (double2)(0x1.7e2f330000000p+0, 0x1.b3d398841740ap-26),
> +    (double2)(0x1.8258990000000p+0, 0x1.2999c25159f11p-25),
> +    (double2)(0x1.868d990000000p+0, 0x1.68925d901c83bp-25),
> +    (double2)(0x1.8ace540000000p+0, 0x1.15506dadd3e2ap-27),
> +    (double2)(0x1.8f1ae90000000p+0, 0x1.22aee6c57304ep-25),
> +    (double2)(0x1.93737b0000000p+0, 0x1.9b8bc9e8a0387p-29),
> +    (double2)(0x1.97d8290000000p+0, 0x1.fbc9c9f173d24p-25),
> +    (double2)(0x1.9c49180000000p+0, 0x1.51f8480e3e235p-27),
> +    (double2)(0x1.a0c6670000000p+0, 0x1.6bbcac96535b5p-25),
> +    (double2)(0x1.a5503b0000000p+0, 0x1.1f12ae45a1224p-27),
> +    (double2)(0x1.a9e6b50000000p+0, 0x1.5e7f6fd0fac90p-26),
> +    (double2)(0x1.ae89f90000000p+0, 0x1.2b5a75abd0e69p-25),
> +    (double2)(0x1.b33a2b0000000p+0, 0x1.09e2bf5ed7fa1p-25),
> +    (double2)(0x1.b7f76f0000000p+0, 0x1.7daf237553d84p-27),
> +    (double2)(0x1.bcc1e90000000p+0, 0x1.2f074891ee83dp-30),
> +    (double2)(0x1.c199bd0000000p+0, 0x1.b0aa538444196p-25),
> +    (double2)(0x1.c67f120000000p+0, 0x1.cafa29694426fp-25),
> +    (double2)(0x1.cb720d0000000p+0, 0x1.9df20d22a0797p-25),
> +    (double2)(0x1.d072d40000000p+0, 0x1.40f12f71a1e45p-25),
> +    (double2)(0x1.d5818d0000000p+0, 0x1.9f7490e4bb40bp-25),
> +    (double2)(0x1.da9e600000000p+0, 0x1.ed9942b84600dp-27),
> +    (double2)(0x1.dfc9730000000p+0, 0x1.bdcdaf5cb4656p-27),
> +    (double2)(0x1.e502ee0000000p+0, 0x1.e2cffd89cf44cp-26),
> +    (double2)(0x1.ea4afa0000000p+0, 0x1.52486cc2c7b9dp-27),
> +    (double2)(0x1.efa1be0000000p+0, 0x1.cc2b44eee3fa4p-25),
> +    (double2)(0x1.f507650000000p+0, 0x1.6dc8a80ce9f09p-25),
> +    (double2)(0x1.fa7c180000000p+0, 0x1.9e90d82e90a7ep-28)
> +
> +};
> +
> +
>  TABLE_FUNCTION(double2, ATAN_JBY256_TBL, atan_jby256_tbl);
> +TABLE_FUNCTION(double2, TWO_TO_JBY64_EP, two_to_jby64_ep_tbl);
>  
>  #endif // cl_khr_fp64
> diff --git a/generic/lib/math/tables.h b/generic/lib/math/tables.h
> index d09adf1..55ff853 100644
> --- a/generic/lib/math/tables.h
> +++ b/generic/lib/math/tables.h
> @@ -48,5 +48,5 @@ TABLE_FUNCTION_DECL(uint4,  pibits_tbl);
>  
>  TABLE_FUNCTION_DECL(double2, ln_tbl);
>  TABLE_FUNCTION_DECL(double2, atan_jby256_tbl);
> -
> +TABLE_FUNCTION_DECL(double2, two_to_jby64_ep_tbl);
>  #endif // cl_khr_fp64

-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20150410/bd01ea92/attachment.sig>