[Libclc-dev] [PATCH 1/2] shared: Implement aligned vector loads (vloada_half)

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Sun Oct 1 06:16:40 PDT 2017


I haven't forgotten about these two...  I'm just trying to figure out some
ambiguity in both the 1.2/2.0/2.2 spec related to whether a scalar version
(vec-size 1 with no numeric suffix) is needed. The CTS tests for
vloada_half being supported, while the spec's language changes a bit
between versions, and never gets to what I'd call a consistent state.

--Aaron

On Sun, Sep 24, 2017, 4:03 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:

> Passes newly posted piglits on Turks.
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  generic/include/clc/shared/vload.h | 30 +++++++++++++++---------------
>  generic/lib/shared/vload.cl        | 10 ++++++++--
>  generic/lib/shared/vload_half.inc  | 26 ++++++++++++++++++--------
>  3 files changed, 41 insertions(+), 25 deletions(-)
>
> diff --git a/generic/include/clc/shared/vload.h
> b/generic/include/clc/shared/vload.h
> index 8c262dd..f6ae917 100644
> --- a/generic/include/clc/shared/vload.h
> +++ b/generic/include/clc/shared/vload.h
> @@ -12,22 +12,24 @@
>    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
>    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
>    _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> -  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
> +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
>
>  #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> -  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
> +  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
>
> -#define _CLC_VECTOR_VLOAD_PRIM() \
> -    _CLC_VECTOR_VLOAD_PRIM1(char) \
> -    _CLC_VECTOR_VLOAD_PRIM1(uchar) \
> -    _CLC_VECTOR_VLOAD_PRIM1(short) \
> -    _CLC_VECTOR_VLOAD_PRIM1(ushort) \
> -    _CLC_VECTOR_VLOAD_PRIM1(int) \
> -    _CLC_VECTOR_VLOAD_PRIM1(uint) \
> -    _CLC_VECTOR_VLOAD_PRIM1(long) \
> -    _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> -    _CLC_VECTOR_VLOAD_PRIM1(float) \
> -    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> +// Declare vector load prototypes
> +_CLC_VECTOR_VLOAD_PRIM1(char)
> +_CLC_VECTOR_VLOAD_PRIM1(uchar)
> +_CLC_VECTOR_VLOAD_PRIM1(short)
> +_CLC_VECTOR_VLOAD_PRIM1(ushort)
> +_CLC_VECTOR_VLOAD_PRIM1(int)
> +_CLC_VECTOR_VLOAD_PRIM1(uint)
> +_CLC_VECTOR_VLOAD_PRIM1(long)
> +_CLC_VECTOR_VLOAD_PRIM1(ulong)
> +_CLC_VECTOR_VLOAD_PRIM1(float)
> +_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> +// Use suffix to declare aligned vloada_halfN
> +_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
>
>  #ifdef cl_khr_fp64
>  #pragma OPENCL EXTENSION cl_khr_fp64: enable
> @@ -38,7 +40,6 @@
>    _CLC_VECTOR_VLOAD_PRIM1(half)
>  #endif
>
> -_CLC_VECTOR_VLOAD_PRIM()
>  // Plain vload_half also needs to be declared
>  _CLC_VLOAD_DECL(_half, half, float, , __constant)
>  _CLC_VLOAD_DECL(_half, half, float, , __global)
> @@ -49,4 +50,3 @@ _CLC_VLOAD_DECL(_half, half, float, , __private)
>  #undef _CLC_VECTOR_VLOAD_DECL
>  #undef _CLC_VECTOR_VLOAD_PRIM3
>  #undef _CLC_VECTOR_VLOAD_PRIM1
> -#undef _CLC_VECTOR_VLOAD_PRIM
> diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> index 0892270..9c37fcf 100644
> --- a/generic/lib/shared/vload.cl
> +++ b/generic/lib/shared/vload.cl
> @@ -85,15 +85,21 @@ float __clc_vload_half_float_helper__private(const
> __private half *);
>         VEC_LOAD8(val.lo, AS) \
>         VEC_LOAD8(val.hi, AS)
>
> -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
>    _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS
> half *mem) { \
>      offset *= VEC_SIZE; \
>      TYPE __tmp; \
>      VEC_LOAD##VEC_SIZE(__tmp, AS) \
>      return __tmp; \
> +  } \
> +  _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS
> half *mem) { \
> +    offset *= OFFSET_SIZE; \
> +    TYPE __tmp; \
> +    VEC_LOAD##VEC_SIZE(__tmp, AS) \
> +    return __tmp; \
>    }
>
> -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE,
> AS)
> +#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX,
> VEC_SIZE, OFFSET_SIZE, TYPE, AS)
>
>  #define __CLC_BODY "vload_half.inc"
>  #include <clc/math/gentype.inc>
> diff --git a/generic/lib/shared/vload_half.inc
> b/generic/lib/shared/vload_half.inc
> index 00dae8a..11b2bf7 100644
> --- a/generic/lib/shared/vload_half.inc
> +++ b/generic/lib/shared/vload_half.inc
> @@ -1,13 +1,23 @@
>  #if __CLC_FPSIZE == 32
> +
>  #ifdef __CLC_VECSIZE
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> +
> +#if __CLC_VECSIZE == 3
> +#  define __CLC_OFFSET 4
>  #else
> -  FUNC(, 1, __CLC_GENTYPE, __private);
> -  FUNC(, 1, __CLC_GENTYPE, __local);
> -  FUNC(, 1, __CLC_GENTYPE, __global);
> -  FUNC(, 1, __CLC_GENTYPE, __constant);
> +#  define __CLC_OFFSET __CLC_VECSIZE
> +#endif
> +
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __private);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __local);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __global);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __constant);
> +
> +#undef __CLC_OFFSET
> +#else
> +  FUNC(, 1, 1, __CLC_GENTYPE, __private);
> +  FUNC(, 1, 1, __CLC_GENTYPE, __local);
> +  FUNC(, 1, 1, __CLC_GENTYPE, __global);
> +  FUNC(, 1, 1, __CLC_GENTYPE, __constant);
>  #endif
>  #endif
> --
> 2.13.5
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20171001/080440f7/attachment.html>


More information about the Libclc-dev mailing list