[Libclc-dev] [PATCH] math: Add frexp ported from amd-builtins

Fri Feb 5 11:06:52 PST 2016

On Mon, 2016-02-01 at 22:47 -0600, Aaron Watry via Libclc-dev wrote:
> The float implementation is almost a direct port from the amd-
> builtins,
> but instead of just having a scalar and float4 implementation, it has
> a scalar and arbitrary width vector implementation.
> 
> The double scalar is also a direct port from AMD's builtin release.
> 
> The double vector implementation copies the logic in the float vector
> implementation using the values from the double scalar version.
> 
> Both have been tested in piglit using tests just sent to that
> project's
> mailing list.
> 
> Signed-off-by: Aaron Watry <awatry at gmail.com>

LGTM. Maybe add newline at the end of frexp.inc.

Reviewed-by: Jan Vesely <jan.vesely at rutgers.edu>

Jan

> ---
>  generic/include/clc/clc.h            |   1 +
>  generic/include/clc/math/frexp.h     |   2 +
>  generic/include/clc/math/frexp.inc   |   3 +
>  generic/include/clc/math/gentype.inc |  24 ++++++++
>  generic/lib/SOURCES                  |   1 +
>  generic/lib/math/frexp.cl            |  10 ++++
>  generic/lib/math/frexp.inc           | 110
> +++++++++++++++++++++++++++++++++++
>  7 files changed, 151 insertions(+)
>  create mode 100644 generic/include/clc/math/frexp.h
>  create mode 100644 generic/include/clc/math/frexp.inc
>  create mode 100644 generic/lib/math/frexp.cl
>  create mode 100644 generic/lib/math/frexp.inc
> 
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index bd2f67f..4060ea1 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -58,6 +58,7 @@
>  #include <clc/math/fmin.h>
>  #include <clc/math/fmod.h>
>  #include <clc/math/fract.h>
> +#include <clc/math/frexp.h>
>  #include <clc/math/half_rsqrt.h>
>  #include <clc/math/half_sqrt.h>
>  #include <clc/math/hypot.h>
> diff --git a/generic/include/clc/math/frexp.h
> b/generic/include/clc/math/frexp.h
> new file mode 100644
> index 0000000..dda23da
> --- /dev/null
> +++ b/generic/include/clc/math/frexp.h
> @@ -0,0 +1,2 @@
> +#define __CLC_BODY <clc/math/frexp.inc>
> +#include <clc/math/gentype.inc>
> diff --git a/generic/include/clc/math/frexp.inc
> b/generic/include/clc/math/frexp.inc
> new file mode 100644
> index 0000000..2a6f7f5
> --- /dev/null
> +++ b/generic/include/clc/math/frexp.inc
> @@ -0,0 +1,3 @@
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global
> __CLC_INTN *iptr);
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local
> __CLC_INTN *iptr);
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private
> __CLC_INTN *iptr);
> diff --git a/generic/include/clc/math/gentype.inc
> b/generic/include/clc/math/gentype.inc
> index 41eb411..e6ffad1 100644
> --- a/generic/include/clc/math/gentype.inc
> +++ b/generic/include/clc/math/gentype.inc
> @@ -2,38 +2,50 @@
>  #define __CLC_FPSIZE 32
>  
>  #define __CLC_GENTYPE float
> +#define __CLC_INTN int
>  #define __CLC_SCALAR
>  #include __CLC_BODY
>  #undef __CLC_GENTYPE
> +#undef __CLC_INTN
>  #undef __CLC_SCALAR
>  
>  #define __CLC_GENTYPE float2
>  #define __CLC_INTN int2
> +#define __CLC_VECSIZE 2
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE float3
>  #define __CLC_INTN int3
> +#define __CLC_VECSIZE 3
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE float4
>  #define __CLC_INTN int4
> +#define __CLC_VECSIZE 4
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE float8
>  #define __CLC_INTN int8
> +#define __CLC_VECSIZE 8
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE float16
>  #define __CLC_INTN int16
> +#define __CLC_VECSIZE 16
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
> @@ -47,37 +59,49 @@
>  
>  #define __CLC_SCALAR
>  #define __CLC_GENTYPE double
> +#define __CLC_INTN int
>  #include __CLC_BODY
>  #undef __CLC_GENTYPE
> +#undef __CLC_INTN
>  #undef __CLC_SCALAR
>  
>  #define __CLC_GENTYPE double2
>  #define __CLC_INTN int2
> +#define __CLC_VECSIZE 2
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE double3
>  #define __CLC_INTN int3
> +#define __CLC_VECSIZE 3
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE double4
>  #define __CLC_INTN int4
> +#define __CLC_VECSIZE 4
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE double8
>  #define __CLC_INTN int8
> +#define __CLC_VECSIZE 8
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
>  #define __CLC_GENTYPE double16
>  #define __CLC_INTN int16
> +#define __CLC_VECSIZE 16
>  #include __CLC_BODY
> +#undef __CLC_VECSIZE
>  #undef __CLC_GENTYPE
>  #undef __CLC_INTN
>  
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 1daae8d..c3a5a8a 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -86,6 +86,7 @@ math/fmax.cl
>  math/fmin.cl
>  math/fmod.cl
>  math/fract.cl
> +math/frexp.cl
>  math/half_rsqrt.cl
>  math/half_sqrt.cl
>  math/hypot.cl
> diff --git a/generic/lib/math/frexp.cl b/generic/lib/math/frexp.cl
> new file mode 100644
> index 0000000..acd5d93
> --- /dev/null
> +++ b/generic/lib/math/frexp.cl
> @@ -0,0 +1,10 @@
> +#include <clc/clc.h>
> +
> +#include "math.h"
> +
> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +#endif
> +
> +#define __CLC_BODY <frexp.inc>
> +#include <clc/math/gentype.inc>
> diff --git a/generic/lib/math/frexp.inc b/generic/lib/math/frexp.inc
> new file mode 100644
> index 0000000..2676993
> --- /dev/null
> +++ b/generic/lib/math/frexp.inc
> @@ -0,0 +1,110 @@
> +/*
> + * Copyright (c) 2014 Advanced Micro Devices, Inc.
> + * Copyright (c) 2016 Aaron Watry
> + *
> + * Permission is hereby granted, free of charge, to any person
> obtaining a copy
> + * of this software and associated documentation files (the
> "Software"), to deal
> + * in the Software without restriction, including without limitation
> the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense,
> and/or sell
> + * copies of the Software, and to permit persons to whom the
> Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be
> included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
> SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS IN
> + * THE SOFTWARE.
> + */
> +#if __CLC_FPSIZE == 32
> +#ifdef __CLC_SCALAR
> +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(float x, private int *ep)
> {
> +    int i = as_int(x);
> +    int ai = i & 0x7fffffff;
> +    int d = ai > 0 & ai < 0x00800000;
> +    // scale subnormal by 2^26 without multiplying
> +    float s = as_float(ai | 0x0d800000) - 0x1.0p-100F;
> +    ai = d ? as_int(s) : ai;
> +    int e = (ai >> 23) - 126 - (d ? 26 : 0);
> +    int t = ai == 0 | e == 129;
> +    i = (i & 0x80000000) | 0x3f000000 | (ai & 0x007fffff);
> +    *ep = t ? 0 : e;
> +    return t ? x : as_float(i);
> +}
> +#define __CLC_FREXP_VEC(width) \
> +_CLC_OVERLOAD _CLC_DEF float##width frexp(float##width x, private
> int##width *ep) { \
> +    int##width i = as_int##width(x); \
> +    int##width ai = i & 0x7fffffff; \
> +    int##width d = ai > 0 & ai < 0x00800000; \
> +    /* scale subnormal by 2^26 without multiplying */ \
> +    float##width s = as_float##width(ai | 0x0d800000) - 0x1.0p-100F; 
> \
> +    ai = bitselect(ai, as_int##width(s), d); \
> +    int##width e = (ai >> 23) - 126 - bitselect((int##width)0,
> (int##width)26, d); \
> +    int##width t = ai == (int##width)0 | e == (int##width)129; \
> +    i = (i & (int##width)0x80000000) | (int##width)0x3f000000 | (ai
> & 0x007fffff); \
> +    *ep = bitselect(e, (int##width)0, t); \
> +    return bitselect(as_float##width(i), x, as_float##width(t)); \
> +}
> +__CLC_FREXP_VEC(2)
> +__CLC_FREXP_VEC(3)
> +__CLC_FREXP_VEC(4)
> +__CLC_FREXP_VEC(8)
> +__CLC_FREXP_VEC(16)
> +#undef __CLC_FREXP_VEC
> +#endif
> +#endif
> +
> +#if __CLC_FPSIZE == 64
> +#ifdef __CLC_SCALAR
> +_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x, private
> __CLC_INTN *ep) {
> +    long i = as_long(x);
> +    long ai = i & 0x7fffffffffffffffL;
> +    int d = ai > 0 & ai < 0x0010000000000000L;
> +    // scale subnormal by 2^54 without multiplying
> +    double s = as_double(ai | 0x0370000000000000L) - 0x1.0p-968;
> +    ai = d ? as_long(s) : ai;
> +    int e = (int)(ai >> 52) - 1022 - (d ? 54 : 0);
> +    int t = ai == 0 | e == 1025;
> +    i = (i & 0x8000000000000000L) | 0x3fe0000000000000L | (ai &
> 0x000fffffffffffffL);
> +    *ep = t ? 0 : e;
> +    return t ? x : as_double(i);
> +}
> +#define __CLC_FREXP_VEC(width) \
> +_CLC_OVERLOAD _CLC_DEF double##width frexp(double##width x, private
> int##width *ep) { \
> +    long##width i = as_long##width(x); \
> +    long##width ai = i & 0x7fffffffffffffffL; \
> +    long##width d = ai > 0 & ai < 0x0010000000000000L; \
> +    /* scale subnormal by 2^54 without multiplying */ \
> +    double##width s = as_double##width(ai | 0x0370000000000000L) -
> 0x1.0p-968; \
> +    ai = bitselect(ai, as_long##width(s), d); \
> +    int##width e = convert_int##width(ai >> 52) - 1022 -
> bitselect((int##width)0, (int##width)54, convert_int##width(d)); \
> +    int##width t = convert_int##width(ai == (long##width)0) | (e ==
> (int##width)129); \
> +    i = (i & (long##width)0x8000000000000000L) |
> (long##width)0x3fe0000000000000L | (ai & 0x000fffffffffffffL); \
> +    *ep = bitselect(e, (int##width)0, t); \
> +    return bitselect(as_double##width(i), x,
> as_double##width(convert_long##width(t))); \
> +}
> +__CLC_FREXP_VEC(2)
> +__CLC_FREXP_VEC(3)
> +__CLC_FREXP_VEC(4)
> +__CLC_FREXP_VEC(8)
> +__CLC_FREXP_VEC(16)
> +#undef __CLC_FREXP_VEC
> +#endif
> +#endif
> +
> +#define __CLC_FREXP_DEF(addrspace) \
> +  _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE frexp(__CLC_GENTYPE x,
> addrspace __CLC_INTN *iptr) { \
> +    __CLC_INTN private_iptr; \
> +    __CLC_GENTYPE ret = frexp(x, &private_iptr); \
> +    *iptr = private_iptr; \
> +    return ret; \
> +}
> +
> +__CLC_FREXP_DEF(local);
> +__CLC_FREXP_DEF(global);
> +
> +#undef __CLC_FREXP_DEF
> \ No newline at end of file
-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20160205/914f75c0/attachment.sig>