[Libclc-dev] [PATCH 01/15] Fix implementation of normalize builtin

Jan Vesely jan.vesely at rutgers.edu
Fri Apr 24 11:46:17 PDT 2015


On Tue, 2015-04-07 at 18:05 +0000, Tom Stellard wrote:
> The new implementation was ported from the AMD builtin library
> and has been tested with piglit, OpenCV, and the ocl conformance tests.

LGTM.
in my limited testing the length was always 1 even if the elements
differed from those computed on CPU. (the error was < 10^-7, for values
in rage of [10^-1, 10^2] in my tests)



sorry it took so long, I'm running a bit low on time.
I could not test patches 6,9,10,14,15, but I scanned them for typos
against the amd_builtin repo, and lgtm.

My idea of getting ldexp to work on r600 is to add a clang provided
define, and ifdef the implementation in r600/lib/math/ldexp.cl.
I'm looking at the build system right now to see if compiling both
generic and asic specific routines would work (it should, generic are
marked overloadable).

regards,
jan



> ---
>  generic/lib/geometric/normalize.cl  | 155 +++++++++++++++++++++++++++++++++++-
>  generic/lib/geometric/normalize.inc |   3 -
>  2 files changed, 152 insertions(+), 6 deletions(-)
>  delete mode 100644 generic/lib/geometric/normalize.inc
> 
> diff --git a/generic/lib/geometric/normalize.cl b/generic/lib/geometric/normalize.cl
> index b06b2fe..f61ac94 100644
> --- a/generic/lib/geometric/normalize.cl
> +++ b/generic/lib/geometric/normalize.cl
> @@ -1,8 +1,157 @@
> +/*
> + * Copyright (c) 2014 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
>  #include <clc/clc.h>
>  
> +_CLC_OVERLOAD _CLC_DEF float normalize(float p) {
> +  return sign(p);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
> +  if (all(p == (float2)0.0F))
> +    return p;
> +
> +  float l2 = dot(p, p);
> +
> +  if (l2 < FLT_MIN) {
> +    p *= 0x1.0p+86F;
> +    l2 = dot(p, p);
> +  } else if (l2 == INFINITY) {
> +    p *= 0x1.0p-65f;
> +    l2 = dot(p, p);
> +    if (l2 == INFINITY) {
> +      p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
> +      l2 = dot(p, p);
> +    }
> +  }
> +  return p * rsqrt(l2);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
> +  if (all(p == (float3)0.0F))
> +    return p;
> +
> +  float l2 = dot(p, p);
> +
> +  if (l2 < FLT_MIN) {
> +    p *= 0x1.0p+86F;
> +    l2 = dot(p, p);
> +  } else if (l2 == INFINITY) {
> +    p *= 0x1.0p-66f;
> +    l2 = dot(p, p);
> +    if (l2 == INFINITY) {
> +      p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
> +      l2 = dot(p, p);
> +    }
> +  }
> +  return p * rsqrt(l2);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
> +  if (all(p == (float4)0.0F))
> +    return p;
> +
> +  float l2 = dot(p, p);
> +
> +  if (l2 < FLT_MIN) {
> +    p *= 0x1.0p+86F;
> +    l2 = dot(p, p);
> +  } else if (l2 == INFINITY) {
> +    p *= 0x1.0p-66f;
> +    l2 = dot(p, p);
> +    if (l2 == INFINITY) {
> +      p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
> +      l2 = dot(p, p);
> +    }
> +  }
> +  return p * rsqrt(l2);
> +}
> +
>  #ifdef cl_khr_fp64
> +
>  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> -#endif
>  
> -#define __CLC_BODY <normalize.inc>
> -#include <clc/geometric/floatn.inc>
> +_CLC_OVERLOAD _CLC_DEF double normalize(double p) {
> +  return sign(p);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
> +  if (all(p == (double2)0.0))
> +    return p;
> +
> +  double l2 = dot(p, p);
> +
> +  if (l2 < DBL_MIN) {
> +    p *= 0x1.0p+563;
> +    l2 = dot(p, p);
> +  } else if (l2 == INFINITY) {
> +    p *= 0x1.0p-513;
> +    l2 = dot(p, p);
> +    if (l2 == INFINITY) {
> +      p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
> +      l2 = dot(p, p);
> +    }
> +  }
> +  return p * rsqrt(l2);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
> +  if (all(p == (double3)0.0))
> +    return p;
> +
> +  double l2 = dot(p, p);
> +
> +  if (l2 < DBL_MIN) {
> +    p *= 0x1.0p+563;
> +    l2 = dot(p, p);
> +  } else if (l2 == INFINITY) {
> +    p *= 0x1.0p-514;
> +    l2 = dot(p, p);
> +    if (l2 == INFINITY) {
> +      p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
> +      l2 = dot(p, p);
> +    }
> +  }
> +  return p * rsqrt(l2);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
> +  if (all(p == (double4)0.0))
> +    return p;
> +
> +  double l2 = dot(p, p);
> +
> +  if (l2 < DBL_MIN) {
> +    p *= 0x1.0p+563;
> +    l2 = dot(p, p);
> +  } else if (l2 == INFINITY) {
> +    p *= 0x1.0p-514;
> +    l2 = dot(p, p);
> +    if (l2 == INFINITY) {
> +      p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
> +      l2 = dot(p, p);
> +    }
> +  }
> +  return p * rsqrt(l2);
> +}
> +
> +#endif
> diff --git a/generic/lib/geometric/normalize.inc b/generic/lib/geometric/normalize.inc
> deleted file mode 100644
> index 423ff79..0000000
> --- a/generic/lib/geometric/normalize.inc
> +++ /dev/null
> @@ -1,3 +0,0 @@
> -_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN normalize(__CLC_FLOATN p) {
> -  return p/length(p);
> -}

-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20150424/9b5fe264/attachment.sig>


More information about the Libclc-dev mailing list