[Libclc-dev] [PATCH 1/2] shared: Implement aligned vector loads (vloada_half)

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Sun Oct 1 10:01:10 PDT 2017


On Sun, 2017-10-01 at 13:16 +0000, Aaron Watry wrote:
> I haven't forgotten about these two...  I'm just trying to figure out some
> ambiguity in both the 1.2/2.0/2.2 spec related to whether a scalar version
> (vec-size 1 with no numeric suffix) is needed. The CTS tests for
> vloada_half being supported, while the spec's language changes a bit
> between versions, and never gets to what I'd call a consistent state.

yeah, it was weird that ctx expects a scalar version. since non-aligned 
vload_half/vstore_half expect the pointer to be 16bit aligned, scalar
vloada_half/vstorea_half would be identical to non-aligned version.
The specs seem to always mention the aligned variants with 'n' suffix,
so I just considered it a CTS bug.

I don't mind going out of specs and adding a scalar version if you
think it's useful beyond appeasing the CTS.

Jan

> 
> --Aaron
> 
> On Sun, Sep 24, 2017, 4:03 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:
> 
> > Passes newly posted piglits on Turks.
> > 
> > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > ---
> >  generic/include/clc/shared/vload.h | 30 +++++++++++++++---------------
> >  generic/lib/shared/vload.cl        | 10 ++++++++--
> >  generic/lib/shared/vload_half.inc  | 26 ++++++++++++++++++--------
> >  3 files changed, 41 insertions(+), 25 deletions(-)
> > 
> > diff --git a/generic/include/clc/shared/vload.h
> > b/generic/include/clc/shared/vload.h
> > index 8c262dd..f6ae917 100644
> > --- a/generic/include/clc/shared/vload.h
> > +++ b/generic/include/clc/shared/vload.h
> > @@ -12,22 +12,24 @@
> >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
> >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
> >    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> > -  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
> > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
> > 
> >  #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> > -  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
> > +  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
> > 
> > -#define _CLC_VECTOR_VLOAD_PRIM() \
> > -    _CLC_VECTOR_VLOAD_PRIM1(char) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(uchar) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(short) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(ushort) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(int) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(uint) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(long) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> > -    _CLC_VECTOR_VLOAD_PRIM1(float) \
> > -    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > +// Declare vector load prototypes
> > +_CLC_VECTOR_VLOAD_PRIM1(char)
> > +_CLC_VECTOR_VLOAD_PRIM1(uchar)
> > +_CLC_VECTOR_VLOAD_PRIM1(short)
> > +_CLC_VECTOR_VLOAD_PRIM1(ushort)
> > +_CLC_VECTOR_VLOAD_PRIM1(int)
> > +_CLC_VECTOR_VLOAD_PRIM1(uint)
> > +_CLC_VECTOR_VLOAD_PRIM1(long)
> > +_CLC_VECTOR_VLOAD_PRIM1(ulong)
> > +_CLC_VECTOR_VLOAD_PRIM1(float)
> > +_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > +// Use suffix to declare aligned vloada_halfN
> > +_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
> > 
> >  #ifdef cl_khr_fp64
> >  #pragma OPENCL EXTENSION cl_khr_fp64: enable
> > @@ -38,7 +40,6 @@
> >    _CLC_VECTOR_VLOAD_PRIM1(half)
> >  #endif
> > 
> > -_CLC_VECTOR_VLOAD_PRIM()
> >  // Plain vload_half also needs to be declared
> >  _CLC_VLOAD_DECL(_half, half, float, , __constant)
> >  _CLC_VLOAD_DECL(_half, half, float, , __global)
> > @@ -49,4 +50,3 @@ _CLC_VLOAD_DECL(_half, half, float, , __private)
> >  #undef _CLC_VECTOR_VLOAD_DECL
> >  #undef _CLC_VECTOR_VLOAD_PRIM3
> >  #undef _CLC_VECTOR_VLOAD_PRIM1
> > -#undef _CLC_VECTOR_VLOAD_PRIM
> > diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> > index 0892270..9c37fcf 100644
> > --- a/generic/lib/shared/vload.cl
> > +++ b/generic/lib/shared/vload.cl
> > @@ -85,15 +85,21 @@ float __clc_vload_half_float_helper__private(const
> > __private half *);
> >         VEC_LOAD8(val.lo, AS) \
> >         VEC_LOAD8(val.hi, AS)
> > 
> > -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> > +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
> >    _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS
> > half *mem) { \
> >      offset *= VEC_SIZE; \
> >      TYPE __tmp; \
> >      VEC_LOAD##VEC_SIZE(__tmp, AS) \
> >      return __tmp; \
> > +  } \
> > +  _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS
> > half *mem) { \
> > +    offset *= OFFSET_SIZE; \
> > +    TYPE __tmp; \
> > +    VEC_LOAD##VEC_SIZE(__tmp, AS) \
> > +    return __tmp; \
> >    }
> > 
> > -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE,
> > AS)
> > +#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX,
> > VEC_SIZE, OFFSET_SIZE, TYPE, AS)
> > 
> >  #define __CLC_BODY "vload_half.inc"
> >  #include <clc/math/gentype.inc>
> > diff --git a/generic/lib/shared/vload_half.inc
> > b/generic/lib/shared/vload_half.inc
> > index 00dae8a..11b2bf7 100644
> > --- a/generic/lib/shared/vload_half.inc
> > +++ b/generic/lib/shared/vload_half.inc
> > @@ -1,13 +1,23 @@
> >  #if __CLC_FPSIZE == 32
> > +
> >  #ifdef __CLC_VECSIZE
> > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> > -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> > +
> > +#if __CLC_VECSIZE == 3
> > +#  define __CLC_OFFSET 4
> >  #else
> > -  FUNC(, 1, __CLC_GENTYPE, __private);
> > -  FUNC(, 1, __CLC_GENTYPE, __local);
> > -  FUNC(, 1, __CLC_GENTYPE, __global);
> > -  FUNC(, 1, __CLC_GENTYPE, __constant);
> > +#  define __CLC_OFFSET __CLC_VECSIZE
> > +#endif
> > +
> > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > __private);
> > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > __local);
> > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > __global);
> > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> > __constant);
> > +
> > +#undef __CLC_OFFSET
> > +#else
> > +  FUNC(, 1, 1, __CLC_GENTYPE, __private);
> > +  FUNC(, 1, 1, __CLC_GENTYPE, __local);
> > +  FUNC(, 1, 1, __CLC_GENTYPE, __global);
> > +  FUNC(, 1, 1, __CLC_GENTYPE, __constant);
> >  #endif
> >  #endif
> > --
> > 2.13.5
> > 
> > 

-- 
Jan Vesely <jan.vesely at rutgers.edu>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20171001/b9dca429/attachment-0001.sig>


More information about the Libclc-dev mailing list