[Libclc-dev] [PATCH 1/2] shared: Implement aligned vector loads (vloada_half)

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Sat Oct 14 08:10:14 PDT 2017


On Thu, Oct 12, 2017, 2:03 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:

> On Sun, 2017-10-01 at 13:01 -0400, Jan Vesely wrote:
> > On Sun, 2017-10-01 at 13:16 +0000, Aaron Watry wrote:
> > > I haven't forgotten about these two...  I'm just trying to figure out
> some
> > > ambiguity in both the 1.2/2.0/2.2 spec related to whether a scalar
> version
> > > (vec-size 1 with no numeric suffix) is needed. The CTS tests for
> > > vloada_half being supported, while the spec's language changes a bit
> > > between versions, and never gets to what I'd call a consistent state.
> >
> > yeah, it was weird that ctx expects a scalar version. since non-aligned
> > vload_half/vstore_half expect the pointer to be 16bit aligned, scalar
> > vloada_half/vstorea_half would be identical to non-aligned version.
> > The specs seem to always mention the aligned variants with 'n' suffix,
> > so I just considered it a CTS bug.
> >
> > I don't mind going out of specs and adding a scalar version if you
> > think it's useful beyond appeasing the CTS.
>
> Hi,
>
> I'm not sure what the final consensus is here. Do you prefer I added
> scalar versions of vloada_half/vstorea_half even though they are
> identical to non-aligned versions?
>
> I could not find any support for it in the specs, only the CTS expects
> it.
>

I'd say at this point that all conformant implementations already probably
support it unless they've all gotten a waiver due to it being a possible
spec bug.

Since it's basically just an alias for vload_half/vstore_half, I wouldn't
be against adding it in for now.  Maybe eventually we can get some
clarification or a test fix put in.

Aaron

>
> Jan
>
> >
> > Jan
> >
> > >
> > > --Aaron
> > >
> > > On Sun, Sep 24, 2017, 4:03 PM Jan Vesely <jan.vesely at rutgers.edu>
> wrote:
> > >
> > > > Passes newly posted piglits on Turks.
> > > >
> > > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > > > ---
> > > >  generic/include/clc/shared/vload.h | 30
> +++++++++++++++---------------
> > > >  generic/lib/shared/vload.cl        | 10 ++++++++--
> > > >  generic/lib/shared/vload_half.inc  | 26 ++++++++++++++++++--------
> > > >  3 files changed, 41 insertions(+), 25 deletions(-)
> > > >
> > > > diff --git a/generic/include/clc/shared/vload.h
> > > > b/generic/include/clc/shared/vload.h
> > > > index 8c262dd..f6ae917 100644
> > > > --- a/generic/include/clc/shared/vload.h
> > > > +++ b/generic/include/clc/shared/vload.h
> > > > @@ -12,22 +12,24 @@
> > > >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
> > > >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
> > > >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> > > > -  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
> > > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
> > > >
> > > >  #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> > > > -  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
> > > > +  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
> > > >
> > > > -#define _CLC_VECTOR_VLOAD_PRIM() \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(char) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(uchar) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(short) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(ushort) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(int) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(uint) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(long) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM1(float) \
> > > > -    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > > > +// Declare vector load prototypes
> > > > +_CLC_VECTOR_VLOAD_PRIM1(char)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(uchar)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(short)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(ushort)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(int)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(uint)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(long)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(ulong)
> > > > +_CLC_VECTOR_VLOAD_PRIM1(float)
> > > > +_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > > > +// Use suffix to declare aligned vloada_halfN
> > > > +_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
> > > >
> > > >  #ifdef cl_khr_fp64
> > > >  #pragma OPENCL EXTENSION cl_khr_fp64: enable
> > > > @@ -38,7 +40,6 @@
> > > >    _CLC_VECTOR_VLOAD_PRIM1(half)
> > > >  #endif
> > > >
> > > > -_CLC_VECTOR_VLOAD_PRIM()
> > > >  // Plain vload_half also needs to be declared
> > > >  _CLC_VLOAD_DECL(_half, half, float, , __constant)
> > > >  _CLC_VLOAD_DECL(_half, half, float, , __global)
> > > > @@ -49,4 +50,3 @@ _CLC_VLOAD_DECL(_half, half, float, , __private)
> > > >  #undef _CLC_VECTOR_VLOAD_DECL
> > > >  #undef _CLC_VECTOR_VLOAD_PRIM3
> > > >  #undef _CLC_VECTOR_VLOAD_PRIM1
> > > > -#undef _CLC_VECTOR_VLOAD_PRIM
> > > > diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/
> vload.cl
> > > > index 0892270..9c37fcf 100644
> > > > --- a/generic/lib/shared/vload.cl
> > > > +++ b/generic/lib/shared/vload.cl
> > > > @@ -85,15 +85,21 @@ float
> __clc_vload_half_float_helper__private(const
> > > > __private half *);
> > > >         VEC_LOAD8(val.lo, AS) \
> > > >         VEC_LOAD8(val.hi, AS)
> > > >
> > > > -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> > > > +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
> > > >    _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset,
> const AS
> > > > half *mem) { \
> > > >      offset *= VEC_SIZE; \
> > > >      TYPE __tmp; \
> > > >      VEC_LOAD##VEC_SIZE(__tmp, AS) \
> > > >      return __tmp; \
> > > > +  } \
> > > > +  _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset,
> const AS
> > > > half *mem) { \
> > > > +    offset *= OFFSET_SIZE; \
> > > > +    TYPE __tmp; \
> > > > +    VEC_LOAD##VEC_SIZE(__tmp, AS) \
> > > > +    return __tmp; \
> > > >    }
> > > >
> > > > -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE,
> TYPE,
> > > > AS)
> > > > +#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX,
> > > > VEC_SIZE, OFFSET_SIZE, TYPE, AS)
> > > >
> > > >  #define __CLC_BODY "vload_half.inc"
> > > >  #include <clc/math/gentype.inc>
> > > > diff --git a/generic/lib/shared/vload_half.inc
> > > > b/generic/lib/shared/vload_half.inc
> > > > index 00dae8a..11b2bf7 100644
> > > > --- a/generic/lib/shared/vload_half.inc
> > > > +++ b/generic/lib/shared/vload_half.inc
> > > > @@ -1,13 +1,23 @@
> > > >  #if __CLC_FPSIZE == 32
> > > > +
> > > >  #ifdef __CLC_VECSIZE
> > > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> > > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> > > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> > > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> > > > +
> > > > +#if __CLC_VECSIZE == 3
> > > > +#  define __CLC_OFFSET 4
> > > >  #else
> > > > -  FUNC(, 1, __CLC_GENTYPE, __private);
> > > > -  FUNC(, 1, __CLC_GENTYPE, __local);
> > > > -  FUNC(, 1, __CLC_GENTYPE, __global);
> > > > -  FUNC(, 1, __CLC_GENTYPE, __constant);
> > > > +#  define __CLC_OFFSET __CLC_VECSIZE
> > > > +#endif
> > > > +
> > > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > > __private);
> > > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > > __local);
> > > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > > __global);
> > > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > > __constant);
> > > > +
> > > > +#undef __CLC_OFFSET
> > > > +#else
> > > > +  FUNC(, 1, 1, __CLC_GENTYPE, __private);
> > > > +  FUNC(, 1, 1, __CLC_GENTYPE, __local);
> > > > +  FUNC(, 1, 1, __CLC_GENTYPE, __global);
> > > > +  FUNC(, 1, 1, __CLC_GENTYPE, __constant);
> > > >  #endif
> > > >  #endif
> > > > --
> > > > 2.13.5
> > > >
> > > >
> >
> >
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20171014/87120640/attachment.html>


More information about the Libclc-dev mailing list