[Libclc-dev] [PATCH 1/2] shared: Implement aligned vector loads (vloada_half)

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Thu Oct 12 12:03:53 PDT 2017


On Sun, 2017-10-01 at 13:01 -0400, Jan Vesely wrote:
> On Sun, 2017-10-01 at 13:16 +0000, Aaron Watry wrote:
> > I haven't forgotten about these two...  I'm just trying to figure out some
> > ambiguity in both the 1.2/2.0/2.2 spec related to whether a scalar version
> > (vec-size 1 with no numeric suffix) is needed. The CTS tests for
> > vloada_half being supported, while the spec's language changes a bit
> > between versions, and never gets to what I'd call a consistent state.
> 
> yeah, it was weird that ctx expects a scalar version. since non-aligned 
> vload_half/vstore_half expect the pointer to be 16bit aligned, scalar
> vloada_half/vstorea_half would be identical to non-aligned version.
> The specs seem to always mention the aligned variants with 'n' suffix,
> so I just considered it a CTS bug.
> 
> I don't mind going out of specs and adding a scalar version if you
> think it's useful beyond appeasing the CTS.

Hi,

I'm not sure what the final consensus is here. Do you prefer I added
scalar versions of vloada_half/vstorea_half even though they are
identical to non-aligned versions?

I could not find any support for it in the specs, only the CTS expects
it.

Jan

> 
> Jan
> 
> > 
> > --Aaron
> > 
> > On Sun, Sep 24, 2017, 4:03 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:
> > 
> > > Passes newly posted piglits on Turks.
> > > 
> > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > > ---
> > >  generic/include/clc/shared/vload.h | 30 +++++++++++++++---------------
> > >  generic/lib/shared/vload.cl        | 10 ++++++++--
> > >  generic/lib/shared/vload_half.inc  | 26 ++++++++++++++++++--------
> > >  3 files changed, 41 insertions(+), 25 deletions(-)
> > > 
> > > diff --git a/generic/include/clc/shared/vload.h
> > > b/generic/include/clc/shared/vload.h
> > > index 8c262dd..f6ae917 100644
> > > --- a/generic/include/clc/shared/vload.h
> > > +++ b/generic/include/clc/shared/vload.h
> > > @@ -12,22 +12,24 @@
> > >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
> > >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
> > >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> > > -  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
> > > 
> > >  #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> > > -  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
> > > +  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
> > > 
> > > -#define _CLC_VECTOR_VLOAD_PRIM() \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(char) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(uchar) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(short) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(ushort) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(int) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(uint) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(long) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> > > -    _CLC_VECTOR_VLOAD_PRIM1(float) \
> > > -    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > > +// Declare vector load prototypes
> > > +_CLC_VECTOR_VLOAD_PRIM1(char)
> > > +_CLC_VECTOR_VLOAD_PRIM1(uchar)
> > > +_CLC_VECTOR_VLOAD_PRIM1(short)
> > > +_CLC_VECTOR_VLOAD_PRIM1(ushort)
> > > +_CLC_VECTOR_VLOAD_PRIM1(int)
> > > +_CLC_VECTOR_VLOAD_PRIM1(uint)
> > > +_CLC_VECTOR_VLOAD_PRIM1(long)
> > > +_CLC_VECTOR_VLOAD_PRIM1(ulong)
> > > +_CLC_VECTOR_VLOAD_PRIM1(float)
> > > +_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > > +// Use suffix to declare aligned vloada_halfN
> > > +_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
> > > 
> > >  #ifdef cl_khr_fp64
> > >  #pragma OPENCL EXTENSION cl_khr_fp64: enable
> > > @@ -38,7 +40,6 @@
> > >    _CLC_VECTOR_VLOAD_PRIM1(half)
> > >  #endif
> > > 
> > > -_CLC_VECTOR_VLOAD_PRIM()
> > >  // Plain vload_half also needs to be declared
> > >  _CLC_VLOAD_DECL(_half, half, float, , __constant)
> > >  _CLC_VLOAD_DECL(_half, half, float, , __global)
> > > @@ -49,4 +50,3 @@ _CLC_VLOAD_DECL(_half, half, float, , __private)
> > >  #undef _CLC_VECTOR_VLOAD_DECL
> > >  #undef _CLC_VECTOR_VLOAD_PRIM3
> > >  #undef _CLC_VECTOR_VLOAD_PRIM1
> > > -#undef _CLC_VECTOR_VLOAD_PRIM
> > > diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> > > index 0892270..9c37fcf 100644
> > > --- a/generic/lib/shared/vload.cl
> > > +++ b/generic/lib/shared/vload.cl
> > > @@ -85,15 +85,21 @@ float __clc_vload_half_float_helper__private(const
> > > __private half *);
> > >         VEC_LOAD8(val.lo, AS) \
> > >         VEC_LOAD8(val.hi, AS)
> > > 
> > > -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> > > +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
> > >    _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS
> > > half *mem) { \
> > >      offset *= VEC_SIZE; \
> > >      TYPE __tmp; \
> > >      VEC_LOAD##VEC_SIZE(__tmp, AS) \
> > >      return __tmp; \
> > > +  } \
> > > +  _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS
> > > half *mem) { \
> > > +    offset *= OFFSET_SIZE; \
> > > +    TYPE __tmp; \
> > > +    VEC_LOAD##VEC_SIZE(__tmp, AS) \
> > > +    return __tmp; \
> > >    }
> > > 
> > > -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE,
> > > AS)
> > > +#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX,
> > > VEC_SIZE, OFFSET_SIZE, TYPE, AS)
> > > 
> > >  #define __CLC_BODY "vload_half.inc"
> > >  #include <clc/math/gentype.inc>
> > > diff --git a/generic/lib/shared/vload_half.inc
> > > b/generic/lib/shared/vload_half.inc
> > > index 00dae8a..11b2bf7 100644
> > > --- a/generic/lib/shared/vload_half.inc
> > > +++ b/generic/lib/shared/vload_half.inc
> > > @@ -1,13 +1,23 @@
> > >  #if __CLC_FPSIZE == 32
> > > +
> > >  #ifdef __CLC_VECSIZE
> > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> > > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> > > +
> > > +#if __CLC_VECSIZE == 3
> > > +#  define __CLC_OFFSET 4
> > >  #else
> > > -  FUNC(, 1, __CLC_GENTYPE, __private);
> > > -  FUNC(, 1, __CLC_GENTYPE, __local);
> > > -  FUNC(, 1, __CLC_GENTYPE, __global);
> > > -  FUNC(, 1, __CLC_GENTYPE, __constant);
> > > +#  define __CLC_OFFSET __CLC_VECSIZE
> > > +#endif
> > > +
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > __private);
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > __local);
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > __global);
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > > __constant);
> > > +
> > > +#undef __CLC_OFFSET
> > > +#else
> > > +  FUNC(, 1, 1, __CLC_GENTYPE, __private);
> > > +  FUNC(, 1, 1, __CLC_GENTYPE, __local);
> > > +  FUNC(, 1, 1, __CLC_GENTYPE, __global);
> > > +  FUNC(, 1, 1, __CLC_GENTYPE, __constant);
> > >  #endif
> > >  #endif
> > > --
> > > 2.13.5
> > > 
> > > 
> 
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20171012/f3c52824/attachment.sig>


More information about the Libclc-dev mailing list