[Libclc-dev] [PATCH 01/15] Fix implementation of normalize builtin

Tom Stellard tom at stellard.net
Fri Apr 24 12:25:34 PDT 2015


On Fri, Apr 24, 2015 at 03:16:21PM -0400, Jan Vesely wrote:
> On Fri, 2015-04-24 at 11:55 -0700, Tom Stellard wrote:
> > On Fri, Apr 24, 2015 at 02:46:17PM -0400, Jan Vesely wrote:
> > > On Tue, 2015-04-07 at 18:05 +0000, Tom Stellard wrote:
> > > > The new implementation was ported from the AMD builtin library
> > > > and has been tested with piglit, OpenCV, and the ocl conformance tests.
> > > 
> > > LGTM.
> > > in my limited testing the length was always 1 even if the elements
> > > differed from those computed on CPU. (the error was < 10^-7, for values
> > > in rage of [10^-1, 10^2] in my tests)
> > > 
> > > 
> > > 
> > > sorry it took so long, I'm running a bit low on time.
> > > I could not test patches 6,9,10,14,15, but I scanned them for typos
> > > against the amd_builtin repo, and lgtm.
> > > 
> > > My idea of getting ldexp to work on r600 is to add a clang provided
> > > define, and ifdef the implementation in r600/lib/math/ldexp.cl.
> > > I'm looking at the build system right now to see if compiling both
> > > generic and asic specific routines would work (it should, generic are
> > > marked overloadable).
> > > 
> > 
> > Another possibility would be to split the R600 lib directory into 3 directories:
> > AMDGCN for SI builtins, AMDGPU for shared code, and R600 for EG+ builtins.
> 
> wouldn't we need a separate directory for EG/NI with fp64 capabilities
> (like FMAF), or should we just (mis)use cl_khr_fp64 for that ?
> 

Yes, that's what we do for the generic libs.  This was just another suggestion
in case you thought it was easier than your current approach.

-Tom

> jan
> 
> > 
> > -Tom
> > 
> > > regards,
> > > jan
> > > 
> > > 
> > > 
> > > > ---
> > > >  generic/lib/geometric/normalize.cl  | 155 +++++++++++++++++++++++++++++++++++-
> > > >  generic/lib/geometric/normalize.inc |   3 -
> > > >  2 files changed, 152 insertions(+), 6 deletions(-)
> > > >  delete mode 100644 generic/lib/geometric/normalize.inc
> > > > 
> > > > diff --git a/generic/lib/geometric/normalize.cl b/generic/lib/geometric/normalize.cl
> > > > index b06b2fe..f61ac94 100644
> > > > --- a/generic/lib/geometric/normalize.cl
> > > > +++ b/generic/lib/geometric/normalize.cl
> > > > @@ -1,8 +1,157 @@
> > > > +/*
> > > > + * Copyright (c) 2014 Advanced Micro Devices, Inc.
> > > > + *
> > > > + * Permission is hereby granted, free of charge, to any person obtaining a copy
> > > > + * of this software and associated documentation files (the "Software"), to deal
> > > > + * in the Software without restriction, including without limitation the rights
> > > > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> > > > + * copies of the Software, and to permit persons to whom the Software is
> > > > + * furnished to do so, subject to the following conditions:
> > > > + *
> > > > + * The above copyright notice and this permission notice shall be included in
> > > > + * all copies or substantial portions of the Software.
> > > > + *
> > > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> > > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> > > > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> > > > + * THE SOFTWARE.
> > > > + */
> > > > +
> > > >  #include <clc/clc.h>
> > > >  
> > > > +_CLC_OVERLOAD _CLC_DEF float normalize(float p) {
> > > > +  return sign(p);
> > > > +}
> > > > +
> > > > +_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
> > > > +  if (all(p == (float2)0.0F))
> > > > +    return p;
> > > > +
> > > > +  float l2 = dot(p, p);
> > > > +
> > > > +  if (l2 < FLT_MIN) {
> > > > +    p *= 0x1.0p+86F;
> > > > +    l2 = dot(p, p);
> > > > +  } else if (l2 == INFINITY) {
> > > > +    p *= 0x1.0p-65f;
> > > > +    l2 = dot(p, p);
> > > > +    if (l2 == INFINITY) {
> > > > +      p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
> > > > +      l2 = dot(p, p);
> > > > +    }
> > > > +  }
> > > > +  return p * rsqrt(l2);
> > > > +}
> > > > +
> > > > +_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
> > > > +  if (all(p == (float3)0.0F))
> > > > +    return p;
> > > > +
> > > > +  float l2 = dot(p, p);
> > > > +
> > > > +  if (l2 < FLT_MIN) {
> > > > +    p *= 0x1.0p+86F;
> > > > +    l2 = dot(p, p);
> > > > +  } else if (l2 == INFINITY) {
> > > > +    p *= 0x1.0p-66f;
> > > > +    l2 = dot(p, p);
> > > > +    if (l2 == INFINITY) {
> > > > +      p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
> > > > +      l2 = dot(p, p);
> > > > +    }
> > > > +  }
> > > > +  return p * rsqrt(l2);
> > > > +}
> > > > +
> > > > +_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
> > > > +  if (all(p == (float4)0.0F))
> > > > +    return p;
> > > > +
> > > > +  float l2 = dot(p, p);
> > > > +
> > > > +  if (l2 < FLT_MIN) {
> > > > +    p *= 0x1.0p+86F;
> > > > +    l2 = dot(p, p);
> > > > +  } else if (l2 == INFINITY) {
> > > > +    p *= 0x1.0p-66f;
> > > > +    l2 = dot(p, p);
> > > > +    if (l2 == INFINITY) {
> > > > +      p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
> > > > +      l2 = dot(p, p);
> > > > +    }
> > > > +  }
> > > > +  return p * rsqrt(l2);
> > > > +}
> > > > +
> > > >  #ifdef cl_khr_fp64
> > > > +
> > > >  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > > > -#endif
> > > >  
> > > > -#define __CLC_BODY <normalize.inc>
> > > > -#include <clc/geometric/floatn.inc>
> > > > +_CLC_OVERLOAD _CLC_DEF double normalize(double p) {
> > > > +  return sign(p);
> > > > +}
> > > > +
> > > > +_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
> > > > +  if (all(p == (double2)0.0))
> > > > +    return p;
> > > > +
> > > > +  double l2 = dot(p, p);
> > > > +
> > > > +  if (l2 < DBL_MIN) {
> > > > +    p *= 0x1.0p+563;
> > > > +    l2 = dot(p, p);
> > > > +  } else if (l2 == INFINITY) {
> > > > +    p *= 0x1.0p-513;
> > > > +    l2 = dot(p, p);
> > > > +    if (l2 == INFINITY) {
> > > > +      p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
> > > > +      l2 = dot(p, p);
> > > > +    }
> > > > +  }
> > > > +  return p * rsqrt(l2);
> > > > +}
> > > > +
> > > > +_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
> > > > +  if (all(p == (double3)0.0))
> > > > +    return p;
> > > > +
> > > > +  double l2 = dot(p, p);
> > > > +
> > > > +  if (l2 < DBL_MIN) {
> > > > +    p *= 0x1.0p+563;
> > > > +    l2 = dot(p, p);
> > > > +  } else if (l2 == INFINITY) {
> > > > +    p *= 0x1.0p-514;
> > > > +    l2 = dot(p, p);
> > > > +    if (l2 == INFINITY) {
> > > > +      p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
> > > > +      l2 = dot(p, p);
> > > > +    }
> > > > +  }
> > > > +  return p * rsqrt(l2);
> > > > +}
> > > > +
> > > > +_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
> > > > +  if (all(p == (double4)0.0))
> > > > +    return p;
> > > > +
> > > > +  double l2 = dot(p, p);
> > > > +
> > > > +  if (l2 < DBL_MIN) {
> > > > +    p *= 0x1.0p+563;
> > > > +    l2 = dot(p, p);
> > > > +  } else if (l2 == INFINITY) {
> > > > +    p *= 0x1.0p-514;
> > > > +    l2 = dot(p, p);
> > > > +    if (l2 == INFINITY) {
> > > > +      p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
> > > > +      l2 = dot(p, p);
> > > > +    }
> > > > +  }
> > > > +  return p * rsqrt(l2);
> > > > +}
> > > > +
> > > > +#endif
> > > > diff --git a/generic/lib/geometric/normalize.inc b/generic/lib/geometric/normalize.inc
> > > > deleted file mode 100644
> > > > index 423ff79..0000000
> > > > --- a/generic/lib/geometric/normalize.inc
> > > > +++ /dev/null
> > > > @@ -1,3 +0,0 @@
> > > > -_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN normalize(__CLC_FLOATN p) {
> > > > -  return p/length(p);
> > > > -}
> > > 
> > > -- 
> > > Jan Vesely <jan.vesely at rutgers.edu>
> > 
> > 
> > 
> > > _______________________________________________
> > > Libclc-dev mailing list
> > > Libclc-dev at pcc.me.uk
> > > http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
> > 
> 
> -- 
> Jan Vesely <jan.vesely at rutgers.edu>






More information about the Libclc-dev mailing list