[Libclc-dev] [PATCH 01/15] Fix implementation of normalize builtin
Jan Vesely
jan.vesely at rutgers.edu
Fri Apr 24 12:16:21 PDT 2015
On Fri, 2015-04-24 at 11:55 -0700, Tom Stellard wrote:
> On Fri, Apr 24, 2015 at 02:46:17PM -0400, Jan Vesely wrote:
> > On Tue, 2015-04-07 at 18:05 +0000, Tom Stellard wrote:
> > > The new implementation was ported from the AMD builtin library
> > > and has been tested with piglit, OpenCV, and the ocl conformance tests.
> >
> > LGTM.
> > in my limited testing the length was always 1 even if the elements
> > differed from those computed on CPU. (the error was < 10^-7, for values
> > in rage of [10^-1, 10^2] in my tests)
> >
> >
> >
> > sorry it took so long, I'm running a bit low on time.
> > I could not test patches 6,9,10,14,15, but I scanned them for typos
> > against the amd_builtin repo, and lgtm.
> >
> > My idea of getting ldexp to work on r600 is to add a clang provided
> > define, and ifdef the implementation in r600/lib/math/ldexp.cl.
> > I'm looking at the build system right now to see if compiling both
> > generic and asic specific routines would work (it should, generic are
> > marked overloadable).
> >
>
> Another possibility would be to split the R600 lib directory into 3 directories:
> AMDGCN for SI builtins, AMDGPU for shared code, and R600 for EG+ builtins.
wouldn't we need a separate directory for EG/NI with fp64 capabilities
(like FMAF), or should we just (mis)use cl_khr_fp64 for that ?
jan
>
> -Tom
>
> > regards,
> > jan
> >
> >
> >
> > > ---
> > > generic/lib/geometric/normalize.cl | 155 +++++++++++++++++++++++++++++++++++-
> > > generic/lib/geometric/normalize.inc | 3 -
> > > 2 files changed, 152 insertions(+), 6 deletions(-)
> > > delete mode 100644 generic/lib/geometric/normalize.inc
> > >
> > > diff --git a/generic/lib/geometric/normalize.cl b/generic/lib/geometric/normalize.cl
> > > index b06b2fe..f61ac94 100644
> > > --- a/generic/lib/geometric/normalize.cl
> > > +++ b/generic/lib/geometric/normalize.cl
> > > @@ -1,8 +1,157 @@
> > > +/*
> > > + * Copyright (c) 2014 Advanced Micro Devices, Inc.
> > > + *
> > > + * Permission is hereby granted, free of charge, to any person obtaining a copy
> > > + * of this software and associated documentation files (the "Software"), to deal
> > > + * in the Software without restriction, including without limitation the rights
> > > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> > > + * copies of the Software, and to permit persons to whom the Software is
> > > + * furnished to do so, subject to the following conditions:
> > > + *
> > > + * The above copyright notice and this permission notice shall be included in
> > > + * all copies or substantial portions of the Software.
> > > + *
> > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> > > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> > > + * THE SOFTWARE.
> > > + */
> > > +
> > > #include <clc/clc.h>
> > >
> > > +_CLC_OVERLOAD _CLC_DEF float normalize(float p) {
> > > + return sign(p);
> > > +}
> > > +
> > > +_CLC_OVERLOAD _CLC_DEF float2 normalize(float2 p) {
> > > + if (all(p == (float2)0.0F))
> > > + return p;
> > > +
> > > + float l2 = dot(p, p);
> > > +
> > > + if (l2 < FLT_MIN) {
> > > + p *= 0x1.0p+86F;
> > > + l2 = dot(p, p);
> > > + } else if (l2 == INFINITY) {
> > > + p *= 0x1.0p-65f;
> > > + l2 = dot(p, p);
> > > + if (l2 == INFINITY) {
> > > + p = copysign(select((float2)0.0F, (float2)1.0F, isinf(p)), p);
> > > + l2 = dot(p, p);
> > > + }
> > > + }
> > > + return p * rsqrt(l2);
> > > +}
> > > +
> > > +_CLC_OVERLOAD _CLC_DEF float3 normalize(float3 p) {
> > > + if (all(p == (float3)0.0F))
> > > + return p;
> > > +
> > > + float l2 = dot(p, p);
> > > +
> > > + if (l2 < FLT_MIN) {
> > > + p *= 0x1.0p+86F;
> > > + l2 = dot(p, p);
> > > + } else if (l2 == INFINITY) {
> > > + p *= 0x1.0p-66f;
> > > + l2 = dot(p, p);
> > > + if (l2 == INFINITY) {
> > > + p = copysign(select((float3)0.0F, (float3)1.0F, isinf(p)), p);
> > > + l2 = dot(p, p);
> > > + }
> > > + }
> > > + return p * rsqrt(l2);
> > > +}
> > > +
> > > +_CLC_OVERLOAD _CLC_DEF float4 normalize(float4 p) {
> > > + if (all(p == (float4)0.0F))
> > > + return p;
> > > +
> > > + float l2 = dot(p, p);
> > > +
> > > + if (l2 < FLT_MIN) {
> > > + p *= 0x1.0p+86F;
> > > + l2 = dot(p, p);
> > > + } else if (l2 == INFINITY) {
> > > + p *= 0x1.0p-66f;
> > > + l2 = dot(p, p);
> > > + if (l2 == INFINITY) {
> > > + p = copysign(select((float4)0.0F, (float4)1.0F, isinf(p)), p);
> > > + l2 = dot(p, p);
> > > + }
> > > + }
> > > + return p * rsqrt(l2);
> > > +}
> > > +
> > > #ifdef cl_khr_fp64
> > > +
> > > #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > > -#endif
> > >
> > > -#define __CLC_BODY <normalize.inc>
> > > -#include <clc/geometric/floatn.inc>
> > > +_CLC_OVERLOAD _CLC_DEF double normalize(double p) {
> > > + return sign(p);
> > > +}
> > > +
> > > +_CLC_OVERLOAD _CLC_DEF double2 normalize(double2 p) {
> > > + if (all(p == (double2)0.0))
> > > + return p;
> > > +
> > > + double l2 = dot(p, p);
> > > +
> > > + if (l2 < DBL_MIN) {
> > > + p *= 0x1.0p+563;
> > > + l2 = dot(p, p);
> > > + } else if (l2 == INFINITY) {
> > > + p *= 0x1.0p-513;
> > > + l2 = dot(p, p);
> > > + if (l2 == INFINITY) {
> > > + p = copysign(select((double2)0.0, (double2)1.0, isinf(p)), p);
> > > + l2 = dot(p, p);
> > > + }
> > > + }
> > > + return p * rsqrt(l2);
> > > +}
> > > +
> > > +_CLC_OVERLOAD _CLC_DEF double3 normalize(double3 p) {
> > > + if (all(p == (double3)0.0))
> > > + return p;
> > > +
> > > + double l2 = dot(p, p);
> > > +
> > > + if (l2 < DBL_MIN) {
> > > + p *= 0x1.0p+563;
> > > + l2 = dot(p, p);
> > > + } else if (l2 == INFINITY) {
> > > + p *= 0x1.0p-514;
> > > + l2 = dot(p, p);
> > > + if (l2 == INFINITY) {
> > > + p = copysign(select((double3)0.0, (double3)1.0, isinf(p)), p);
> > > + l2 = dot(p, p);
> > > + }
> > > + }
> > > + return p * rsqrt(l2);
> > > +}
> > > +
> > > +_CLC_OVERLOAD _CLC_DEF double4 normalize(double4 p) {
> > > + if (all(p == (double4)0.0))
> > > + return p;
> > > +
> > > + double l2 = dot(p, p);
> > > +
> > > + if (l2 < DBL_MIN) {
> > > + p *= 0x1.0p+563;
> > > + l2 = dot(p, p);
> > > + } else if (l2 == INFINITY) {
> > > + p *= 0x1.0p-514;
> > > + l2 = dot(p, p);
> > > + if (l2 == INFINITY) {
> > > + p = copysign(select((double4)0.0, (double4)1.0, isinf(p)), p);
> > > + l2 = dot(p, p);
> > > + }
> > > + }
> > > + return p * rsqrt(l2);
> > > +}
> > > +
> > > +#endif
> > > diff --git a/generic/lib/geometric/normalize.inc b/generic/lib/geometric/normalize.inc
> > > deleted file mode 100644
> > > index 423ff79..0000000
> > > --- a/generic/lib/geometric/normalize.inc
> > > +++ /dev/null
> > > @@ -1,3 +0,0 @@
> > > -_CLC_OVERLOAD _CLC_DEF __CLC_FLOATN normalize(__CLC_FLOATN p) {
> > > - return p/length(p);
> > > -}
> >
> > --
> > Jan Vesely <jan.vesely at rutgers.edu>
>
>
>
> > _______________________________________________
> > Libclc-dev mailing list
> > Libclc-dev at pcc.me.uk
> > http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
>
--
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20150424/d392d5e6/attachment.sig>
More information about the Libclc-dev
mailing list