<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Mar 16, 2015 at 8:59 AM, Tom Stellard <span dir="ltr"><<a href="mailto:tom@stellard.net" target="_blank">tom@stellard.net</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div class="HOEnZb"><div class="h5">On Fri, Mar 13, 2015 at 09:50:56AM -0500, Aaron Watry wrote:<br>

> On Thu, Mar 12, 2015 at 10:13 PM, Tom Stellard <<a href="mailto:tom@stellard.net">tom@stellard.net</a>> wrote:<br>

><br>

> > On Thu, Mar 12, 2015 at 08:48:13PM -0500, Aaron Watry wrote:<br>

> > > Signed-off-by: Aaron Watry <<a href="mailto:awatry@gmail.com">awatry@gmail.com</a>><br>

> > > ---<br>

> > >  generic/include/clc/clc.h                |   1 +<br>

> > >  generic/include/clc/math/binary_decl.inc |  18 ++++-<br>

> > >  generic/include/clc/math/gentype_tss.inc | 108<br>

> > +++++++++++++++++++++++++++++<br>

> > >  generic/include/clc/math/ldexp.h         |   9 +++<br>

> > >  generic/lib/SOURCES                      |   1 +<br>

> > >  generic/lib/clcmacro.h                   |  26 +++++++<br>

> > >  generic/lib/math/<a href="http://ldexp.cl" target="_blank">ldexp.cl</a>                | 114<br>

> > +++++++++++++++++++++++++++++++<br>

> ><br>

> > I just sent out a patch implementing an optimized ldexp for R600/SI.<br>

> > I was able create the declarations and definitions without having to<br>

> > modify too many of the *.inc files or adding new macros to clcmacro.h<br>

> ><br>

> > I think we should try a similar approach for the generic version.<br>

> ><br>

> ><br>

> Fair enough.  We might as well piggy-back on what you did for the R600/SI<br>

> version.<br>

><br>

><br>

> > >  7 files changed, 274 insertions(+), 3 deletions(-)<br>

> > >  create mode 100644 generic/include/clc/math/gentype_tss.inc<br>

> > >  create mode 100644 generic/include/clc/math/ldexp.h<br>

> > >  create mode 100644 generic/lib/math/<a href="http://ldexp.cl" target="_blank">ldexp.cl</a><br>

> > ><br>

> > > diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h<br>

> > > index 1c12cf3..ecabcf1 100644<br>

> > > --- a/generic/include/clc/clc.h<br>

> > > +++ b/generic/include/clc/clc.h<br>

> > > @@ -51,6 +51,7 @@<br>

> > >  #include <clc/math/fmin.h><br>

> > >  #include <clc/math/fmod.h><br>

> > >  #include <clc/math/hypot.h><br>

> > > +#include <clc/math/ldexp.h><br>

> > >  #include <clc/math/log.h><br>

> > >  #include <clc/math/log10.h><br>

> > >  #include <clc/math/log1p.h><br>

> > > diff --git a/generic/include/clc/math/binary_decl.inc<br>

> > b/generic/include/clc/math/binary_decl.inc<br>

> > > index 70a7114..1805527 100644<br>

> > > --- a/generic/include/clc/math/binary_decl.inc<br>

> > > +++ b/generic/include/clc/math/binary_decl.inc<br>

> > > @@ -1,6 +1,18 @@<br>

> > > -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a,<br>

> > __CLC_GENTYPE b);<br>

> > > -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a,<br>

> > float b);<br>

> > > +#ifdef __CLC_INT_GENTYPE<br>

> > > +  #if !defined(__CLC_SCALAR)<br>

> > > +    _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE<br>

> > a, __CLC_INT_GENTYPE b);<br>

> > > +  #endif<br>

> > > +    _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE<br>

> > a, __CLC_SCALAR_INT_TYPE b);<br>

> > > +<br>

> > > +#else<br>

> > > +<br>

> > > +#if !defined(__CLC_SCALAR)<br>

> > > +  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a,<br>

> > __CLC_GENTYPE b);<br>

> > > +#endif<br>

> > > +  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a,<br>

> > float b);<br>

> > ><br>

> > >  #ifdef cl_khr_fp64<br>

> > > -_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a,<br>

> > double b);<br>

> > > +  _CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE __CLC_FUNCTION(__CLC_GENTYPE a,<br>

> > double b);<br>

> > > +#endif<br>

> > > +<br>

> > >  #endif<br>

> > > diff --git a/generic/include/clc/math/gentype_tss.inc<br>

> > b/generic/include/clc/math/gentype_tss.inc<br>

> > > new file mode 100644<br>

> > > index 0000000..11ec9ff<br>

> > > --- /dev/null<br>

> > > +++ b/generic/include/clc/math/gentype_tss.inc<br>

> > > @@ -0,0 +1,108 @@<br>

> > > +/* Used to provide support for multi-arg functions where the argument<br>

> > types and/or sizes do NOT match<br>

> > > + *<br>

> > > + *  e.g. ldexp(float16,int), ldexp(float16,int16), ldexp(double8, int)<br>

> > > + *<br>

> > > + * In general, consumers of this include will probably have versions<br>

> > with a vector first argument, and then<br>

> > > + * vector/scalar 2nd argument which may have an entirely different base<br>

> > type.<br>

> > > + */<br>

> > > +<br>

> > > +#define __CLC_SCALAR_GENTYPE float<br>

> > > +#ifndef __CLC_SCALAR_INT_TYPE<br>

> > > +    #define __CLC_UNDEF_SCALAR_INT_TYPE<br>

> > > +    #define __CLC_SCALAR_INT_TYPE int<br>

> > > +#endif<br>

> > > +#define __CLC_FPSIZE 32<br>

> > > +<br>

> > > +#define __CLC_GENTYPE float<br>

> > > +#define __CLC_INT_GENTYPE int<br>

> > > +#define __CLC_SCALAR<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +#undef __CLC_SCALAR<br>

> > > +<br>

> > > +#define __CLC_GENTYPE float2<br>

> > > +#define __CLC_INT_GENTYPE int2<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE float3<br>

> > > +#define __CLC_INT_GENTYPE int3<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE float4<br>

> > > +#define __CLC_INT_GENTYPE int4<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE float8<br>

> > > +#define __CLC_INT_GENTYPE int8<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE float16<br>

> > > +#define __CLC_INT_GENTYPE int16<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#undef __CLC_FPSIZE<br>

> > > +#undef __CLC_SCALAR_GENTYPE<br>

> > > +<br>

> > > +#ifdef cl_khr_fp64<br>

> > > +#define __CLC_SCALAR_GENTYPE double<br>

> > > +#define __CLC_FPSIZE 64<br>

> > > +<br>

> > > +#define __CLC_SCALAR<br>

> > > +#define __CLC_GENTYPE double<br>

> > > +#define __CLC_INT_GENTYPE int<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_SCALAR<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE double2<br>

> > > +#define __CLC_INT_GENTYPE int2<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE double3<br>

> > > +#define __CLC_INT_GENTYPE int3<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE double4<br>

> > > +#define __CLC_INT_GENTYPE int4<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE double8<br>

> > > +#define __CLC_INT_GENTYPE int8<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#define __CLC_GENTYPE double16<br>

> > > +#define __CLC_INT_GENTYPE int16<br>

> > > +#include __CLC_BODY<br>

> > > +#undef __CLC_GENTYPE<br>

> > > +#undef __CLC_INT_GENTYPE<br>

> > > +<br>

> > > +#undef __CLC_FPSIZE<br>

> > > +#undef __CLC_SCALAR_GENTYPE<br>

> > > +#endif<br>

> > > +<br>

> > > +#ifdef __CLC_UNDEF_SCALAR_INT_TYPE<br>

> > > +    #undef __CLC_SCALAR_INT_TYPE<br>

> > > +    #undef __CLC_UNDEF_SCALAR_INT_TYPE<br>

> > > +#endif<br>

> > > +<br>

> > > +#undef __CLC_BODY<br>

> > > diff --git a/generic/include/clc/math/ldexp.h<br>

> > b/generic/include/clc/math/ldexp.h<br>

> > > new file mode 100644<br>

> > > index 0000000..2e3b502<br>

> > > --- /dev/null<br>

> > > +++ b/generic/include/clc/math/ldexp.h<br>

> > > @@ -0,0 +1,9 @@<br>

> > > +#define __CLC_BODY <clc/math/binary_decl.inc><br>

> > > +#define __CLC_FUNCTION ldexp<br>

> > > +<br>

> > > +#include <clc/math/gentype_tss.inc><br>

> > > +<br>

> > > +#undef __CLC_BODY<br>

> > > +#undef __CLC_FUNCTION<br>

> > > +<br>

> > > +#undef __CLC_ARG2_BASE_TYPE<br>

> > > \ No newline at end of file<br>

> > > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES<br>

> > > index 0110e15..be6865e 100644<br>

> > > --- a/generic/lib/SOURCES<br>

> > > +++ b/generic/lib/SOURCES<br>

> > > @@ -70,6 +70,7 @@ math/<a href="http://fmax.cl" target="_blank">fmax.cl</a><br>

> > >  math/<a href="http://fmin.cl" target="_blank">fmin.cl</a><br>

> > >  math/<a href="http://fmod.cl" target="_blank">fmod.cl</a><br>

> > >  math/<a href="http://hypot.cl" target="_blank">hypot.cl</a><br>

> > > +math/<a href="http://ldexp.cl" target="_blank">ldexp.cl</a><br>

> > >  math/<a href="http://log10.cl" target="_blank">log10.cl</a><br>

> > >  math/<a href="http://log1p.cl" target="_blank">log1p.cl</a><br>

> > >  math/<a href="http://mad.cl" target="_blank">mad.cl</a><br>

> > > diff --git a/generic/lib/clcmacro.h b/generic/lib/clcmacro.h<br>

> > > index 346adf2..3f389e5 100644<br>

> > > --- a/generic/lib/clcmacro.h<br>

> > > +++ b/generic/lib/clcmacro.h<br>

> > > @@ -41,6 +41,28 @@<br>

> > >      return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \<br>

> > >    }<br>

> > ><br>

> > > +#define _CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(DECLSPEC, RET_TYPE,<br>

> > FUNCTION, ARG1_TYPE, ARG2_TYPE) \<br>

> > > +  DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE y) { \<br>

> > > +    return (RET_TYPE##2)(FUNCTION(x.x, y), FUNCTION(x.y, y)); \<br>

> > > +  } \<br>

> > > +\<br>

> > > +  DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE y) { \<br>

> > > +    return (RET_TYPE##3)(FUNCTION(x.x, y), FUNCTION(x.y, y), \<br>

> > > +                         FUNCTION(x.z, y)); \<br>

> > > +  } \<br>

> > > +\<br>

> > > +  DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE y) { \<br>

> > > +    return (RET_TYPE##4)(FUNCTION(x.lo, y), FUNCTION(x.hi, y)); \<br>

> > > +  } \<br>

> > > +\<br>

> > > +  DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE y) { \<br>

> > > +    return (RET_TYPE##8)(FUNCTION(x.lo, y), FUNCTION(x.hi, y)); \<br>

> > > +  } \<br>

> > > +\<br>

> > > +  DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE y) { \<br>

> > > +    return (RET_TYPE##16)(FUNCTION(x.lo, y), FUNCTION(x.hi, y)); \<br>

> > > +  }<br>

> > > +<br>

> > >  #define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE,<br>

> > ARG2_TYPE) \<br>

> > >    DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \<br>

> > >      return (RET_TYPE##2)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \<br>

> > > @@ -115,6 +137,10 @@ _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE<br>

> > x, ARG2_TYPE y) { \<br>

> > >  } \<br>

> > >  _CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, RET_TYPE, FUNCTION,<br>

> > ARG1_TYPE, ARG2_TYPE)<br>

> > ><br>

> > > +#define _CLC_DEFINE_BINARY_BUILTIN_WITH_SCALAR_SECOND_ARG(RET_TYPE,<br>

> > FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \<br>

> > > +_CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE,<br>

> > ARG2_TYPE) \<br>

> > > +_CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF,<br>

> > RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)<br>

> > > +<br>

> > >  #define _CLC_DEFINE_UNARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN,<br>

> > ARG1_TYPE) \<br>

> > >  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x) { \<br>

> > >    return BUILTIN(x); \<br>

> > > diff --git a/generic/lib/math/<a href="http://ldexp.cl" target="_blank">ldexp.cl</a> b/generic/lib/math/<a href="http://ldexp.cl" target="_blank">ldexp.cl</a><br>

> > > new file mode 100644<br>

> > > index 0000000..b7c5a92<br>

> > > --- /dev/null<br>

> > > +++ b/generic/lib/math/<a href="http://ldexp.cl" target="_blank">ldexp.cl</a><br>

> > > @@ -0,0 +1,114 @@<br>

> > > +/*<br>

> > > + * Copyright (c) 2014 Advanced Micro Devices, Inc.<br>

> > > + *<br>

> > > + * Permission is hereby granted, free of charge, to any person<br>

> > obtaining a copy<br>

> > > + * of this software and associated documentation files (the<br>

> > "Software"), to deal<br>

> > > + * in the Software without restriction, including without limitation<br>

> > the rights<br>

> > > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or<br>

> > sell<br>

> > > + * copies of the Software, and to permit persons to whom the Software is<br>

> > > + * furnished to do so, subject to the following conditions:<br>

> > > + *<br>

> > > + * The above copyright notice and this permission notice shall be<br>

> > included in<br>

> > > + * all copies or substantial portions of the Software.<br>

> > > + *<br>

> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,<br>

> > EXPRESS OR<br>

> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF<br>

> > MERCHANTABILITY,<br>

> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT<br>

> > SHALL THE<br>

> > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR<br>

> > OTHER<br>

> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,<br>

> > ARISING FROM,<br>

> > > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER<br>

> > DEALINGS IN<br>

> > > + * THE SOFTWARE.<br>

> > > + */<br>

> > > +<br>

> > > +#include <clc/clc.h><br>

> > > +#include "../clcmacro.h"<br>

> > > +#include "math.h"<br>

> > > +<br>

> > > +_CLC_DEF _CLC_OVERLOAD float ldexp(float x, int n) {<br>

> ><br>

> > I think this is where we need to integrate with my subnormal helper<br>

> > patches to add denormal flushing.  I can give this a try if you want.<br>

> ><br>

> ><br>

> Go for it.  I did notice that your denormal patch always reports false for<br>

> 16/32-bit float denormal support, so this will always fall into the same<br>

> code path regardless for float ldexp.  It's just doubles which have a call<br>

> into llvm to check if the hardware supports subnormals.<br>

><br>

<br>

</div></div>My patch just implemented the defaults required by the OpenCL spec.  Targets<br>

will be able to override these if they want.<br>

<span class=""><br>

<br>

> Bit of a question:  If the hardware supports subnormals, do we just not<br>

> need the subnormal support code?<br>

<br>

</span>We are not required by the spec to include the subnormal support code if<br>

subnormals are supported.  However, if we want to support the<br>

-cl-denorms-are-zero flag, we will need the subnormal support code.<br>

<br>

Note that spec does not require us to do anything when this flag is passed,<br>

so supporting this flag is not required for compliance, it is just<br>

for enabling optimizations in some cases.<br>

Supporting this flag is very low priority for me.<br>

<span class=""><br>

><br>

> I noticed in quite a few of the amd builtins that subnormal support is<br>

> explicitly tested for, but in the ldexp case, the code below is straight<br>

> from the AMD built-in (doesn't have a check for if the hardware supports<br>

> subnormals or not).  I guess it's possible that the ldexp implementation<br>

> was done in such a way that it didn't matter if the hardware supports<br>

> subnormals or not, and we're leaving missing out on possible performance by<br>

> always running the code as written when we have hardware with subnormal<br>

> support.<br>

<br>

</span>In the ldexp implementation, I think the code inside the #if 0<br>

block was handling denormals:<br>

<br>

<a href="http://llvm.org/viewvc/llvm-project/libclc/branches/amd-builtins/amd-builtins/math32/ldexpF.cl?view=co" target="_blank">http://llvm.org/viewvc/llvm-project/libclc/branches/amd-builtins/amd-builtins/math32/ldexpF.cl?view=co</a><br></blockquote><div><br></div><div>Wow, I'm blind.  I was looking at that exact file as I wrote up my previous email, and I totally glossed over the #if 0 section.  I'm guessing because my IDE highlighted it as a comment/dead-code and, well, because it was Friday morning before coffee....<br><br></div><div>--Aaron<br></div><div></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<span class="HOEnZb"><font color="#888888"><br>

-Tom<br>

</font></span><div class="HOEnZb"><div class="h5"><br>

><br>

> In summary, floating point isn't my strongest area, and without wikipedia,<br>

> wolfram, and other sites to explain some of these algorithms, I'd be<br>

> hopeless :)<br>

><br>

> --Aaron<br>

><br>

> -Tom<br>

> ><br>

> ><br>

> > > +     /* supports denormal values */<br>

> > > +     const int multiplier = 24;<br>

> > > +     float val_f;<br>

> > > +     uint val_ui;<br>

> > > +     uint sign;<br>

> > > +     int exponent;<br>

> > > +     val_ui = as_uint(x);<br>

> > > +     sign = val_ui & 0x80000000;<br>

> > > +     val_ui = val_ui & 0x7fffffff;/* remove the sign bit */<br>

> > > +     int val_x = val_ui;<br>

> > > +<br>

> > > +     exponent = val_ui >> 23; /* get the exponent */<br>

> > > +     int dexp = exponent;<br>

> > > +<br>

> > > +     /* denormal support */<br>

> > > +     int fbh = 127 - (as_uint((float)(as_float(val_ui | 0x3f800000) -<br>

> > 1.0f)) >> 23);<br>

> > > +     int dexponent = 25 - fbh;<br>

> > > +     uint dval_ui = (( (val_ui << fbh) & 0x007fffff) | (dexponent <<<br>

> > 23));<br>

> > > +     int ex = dexponent + n - multiplier;<br>

> > > +     dexponent = ex;<br>

> > > +     uint val = sign | (ex << 23) | (dval_ui & 0x007fffff);<br>

> > > +     int ex1 = dexponent + multiplier;<br>

> > > +     ex1 = -ex1 +25;<br>

> > > +     dval_ui = (((dval_ui & 0x007fffff )| 0x800000) >> ex1);<br>

> > > +     dval_ui = dexponent > 0 ? val :dval_ui;<br>

> > > +     dval_ui = dexponent > 254 ? 0x7f800000 :dval_ui;  /*overflow*/<br>

> > > +     dval_ui = dexponent < -multiplier ? 0 : dval_ui;  /*underflow*/<br>

> > > +     dval_ui = dval_ui | sign;<br>

> > > +     val_f = as_float(dval_ui);<br>

> > > +<br>

> > > +     exponent += n;<br>

> > > +<br>

> > > +     val = sign | (exponent << 23) | (val_ui & 0x007fffff);<br>

> > > +     ex1 = exponent + multiplier;<br>

> > > +     ex1 = -ex1 +25;<br>

> > > +     val_ui = (((val_ui & 0x007fffff )| 0x800000) >> ex1);<br>

> > > +     val_ui = exponent > 0 ? val :val_ui;<br>

> > > +     val_ui = exponent > 254 ? 0x7f800000 :val_ui;  /*overflow*/<br>

> > > +     val_ui = exponent < -multiplier ? 0 : val_ui;  /*underflow*/<br>

> > > +     val_ui = val_ui | sign;<br>

> > > +<br>

> > > +     val_ui = dexp == 0? dval_ui : val_ui;<br>

> > > +     val_f = as_float(val_ui);<br>

> > > +<br>

> > > +     val_f = isnan(x) | isinf(x) | val_x == 0 ? x : val_f;<br>

> > > +     return val_f;<br>

> > > +}<br>

> > > +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, ldexp, float, int)<br>

> > > +_CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, float,<br>

> > ldexp, float, int)<br>

> > > +<br>

> > > +#ifdef cl_khr_fp64<br>

> > > +<br>

> > > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable<br>

> > > +<br>

> > > +_CLC_DEF _CLC_OVERLOAD double ldexp(double x, int n) {<br>

> > > +     long l = as_ulong(x);<br>

> > > +     int e = (l >> 52) & 0x7ff;<br>

> > > +     long s = l & 0x8000000000000000;<br>

> > > +<br>

> > > +     ulong ux = as_ulong(x * 0x1.0p+53);<br>

> > > +     int de = ((int)(ux >> 52) & 0x7ff) - 53;<br>

> > > +     int c = e == 0;<br>

> > > +     e = c ? de: e;<br>

> > > +<br>

> > > +     ux = c ? ux : l;<br>

> > > +<br>

> > > +     int v = e + n;<br>

> > > +     v = clamp(v, -0x7ff, 0x7ff);<br>

> > > +<br>

> > > +     ux &= ~EXPBITS_DP64;<br>

> > > +<br>

> > > +     double mr = as_double(ux | ((ulong)(v+53) << 52));<br>

> > > +     mr = mr * 0x1.0p-53;<br>

> > > +<br>

> > > +     mr = v > 0  ? as_double(ux | ((ulong)v << 52)) : mr;<br>

> > > +<br>

> > > +     mr = v == 0x7ff ? as_double(s | PINFBITPATT_DP64)  : mr;<br>

> > > +     mr = v < -53 ? as_double(s) : mr;<br>

> > > +<br>

> > > +     mr  = ((n == 0) | isinf(x) | (x == 0) ) ? x : mr;<br>

> > > +     return mr;<br>

> > > +}<br>

> > > +<br>

> > > +_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, ldexp, double,<br>

> > int)<br>

> > > +_CLC_BINARY_VECTORIZE_SCALAR_SECOND_ARG(_CLC_OVERLOAD _CLC_DEF, double,<br>

> > ldexp, double, int)<br>

> > > +<br>

> > > +#endif<br>

> > > --<br>

> > > 2.2.0<br>

> > ><br>

> > ><br>

> > > _______________________________________________<br>

> > > Libclc-dev mailing list<br>

> > > <a href="mailto:Libclc-dev@pcc.me.uk">Libclc-dev@pcc.me.uk</a><br>

> > > <a href="http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev" target="_blank">http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev</a><br>

> ><br>

</div></div></blockquote></div><br></div></div>