[Libclc-dev] [PATCH 1/2] shared: Implement aligned vector loads (vloada_half)
Aaron Watry via Libclc-dev
libclc-dev at lists.llvm.org
Sun Oct 1 06:16:40 PDT 2017
I haven't forgotten about these two... I'm just trying to figure out some
ambiguity in both the 1.2/2.0/2.2 spec related to whether a scalar version
(vec-size 1 with no numeric suffix) is needed. The CTS tests for
vloada_half being supported, while the spec's language changes a bit
between versions, and never gets to what I'd call a consistent state.
--Aaron
On Sun, Sep 24, 2017, 4:03 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:
> Passes newly posted piglits on Turks.
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> generic/include/clc/shared/vload.h | 30 +++++++++++++++---------------
> generic/lib/shared/vload.cl | 10 ++++++++--
> generic/lib/shared/vload_half.inc | 26 ++++++++++++++++++--------
> 3 files changed, 41 insertions(+), 25 deletions(-)
>
> diff --git a/generic/include/clc/shared/vload.h
> b/generic/include/clc/shared/vload.h
> index 8c262dd..f6ae917 100644
> --- a/generic/include/clc/shared/vload.h
> +++ b/generic/include/clc/shared/vload.h
> @@ -12,22 +12,24 @@
> _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
> _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
> _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> - _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
> + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global)
>
> #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> - _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
> + _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE)
>
> -#define _CLC_VECTOR_VLOAD_PRIM() \
> - _CLC_VECTOR_VLOAD_PRIM1(char) \
> - _CLC_VECTOR_VLOAD_PRIM1(uchar) \
> - _CLC_VECTOR_VLOAD_PRIM1(short) \
> - _CLC_VECTOR_VLOAD_PRIM1(ushort) \
> - _CLC_VECTOR_VLOAD_PRIM1(int) \
> - _CLC_VECTOR_VLOAD_PRIM1(uint) \
> - _CLC_VECTOR_VLOAD_PRIM1(long) \
> - _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> - _CLC_VECTOR_VLOAD_PRIM1(float) \
> - _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> +// Declare vector load prototypes
> +_CLC_VECTOR_VLOAD_PRIM1(char)
> +_CLC_VECTOR_VLOAD_PRIM1(uchar)
> +_CLC_VECTOR_VLOAD_PRIM1(short)
> +_CLC_VECTOR_VLOAD_PRIM1(ushort)
> +_CLC_VECTOR_VLOAD_PRIM1(int)
> +_CLC_VECTOR_VLOAD_PRIM1(uint)
> +_CLC_VECTOR_VLOAD_PRIM1(long)
> +_CLC_VECTOR_VLOAD_PRIM1(ulong)
> +_CLC_VECTOR_VLOAD_PRIM1(float)
> +_CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> +// Use suffix to declare aligned vloada_halfN
> +_CLC_VECTOR_VLOAD_PRIM3(a_half, half, float)
>
> #ifdef cl_khr_fp64
> #pragma OPENCL EXTENSION cl_khr_fp64: enable
> @@ -38,7 +40,6 @@
> _CLC_VECTOR_VLOAD_PRIM1(half)
> #endif
>
> -_CLC_VECTOR_VLOAD_PRIM()
> // Plain vload_half also needs to be declared
> _CLC_VLOAD_DECL(_half, half, float, , __constant)
> _CLC_VLOAD_DECL(_half, half, float, , __global)
> @@ -49,4 +50,3 @@ _CLC_VLOAD_DECL(_half, half, float, , __private)
> #undef _CLC_VECTOR_VLOAD_DECL
> #undef _CLC_VECTOR_VLOAD_PRIM3
> #undef _CLC_VECTOR_VLOAD_PRIM1
> -#undef _CLC_VECTOR_VLOAD_PRIM
> diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> index 0892270..9c37fcf 100644
> --- a/generic/lib/shared/vload.cl
> +++ b/generic/lib/shared/vload.cl
> @@ -85,15 +85,21 @@ float __clc_vload_half_float_helper__private(const
> __private half *);
> VEC_LOAD8(val.lo, AS) \
> VEC_LOAD8(val.hi, AS)
>
> -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) \
> _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS
> half *mem) { \
> offset *= VEC_SIZE; \
> TYPE __tmp; \
> VEC_LOAD##VEC_SIZE(__tmp, AS) \
> return __tmp; \
> + } \
> + _CLC_OVERLOAD _CLC_DEF TYPE vloada_half##SUFFIX(size_t offset, const AS
> half *mem) { \
> + offset *= OFFSET_SIZE; \
> + TYPE __tmp; \
> + VEC_LOAD##VEC_SIZE(__tmp, AS) \
> + return __tmp; \
> }
>
> -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE,
> AS)
> +#define FUNC(SUFFIX, VEC_SIZE, OFFSET_SIZE, TYPE, AS) __FUNC(SUFFIX,
> VEC_SIZE, OFFSET_SIZE, TYPE, AS)
>
> #define __CLC_BODY "vload_half.inc"
> #include <clc/math/gentype.inc>
> diff --git a/generic/lib/shared/vload_half.inc
> b/generic/lib/shared/vload_half.inc
> index 00dae8a..11b2bf7 100644
> --- a/generic/lib/shared/vload_half.inc
> +++ b/generic/lib/shared/vload_half.inc
> @@ -1,13 +1,23 @@
> #if __CLC_FPSIZE == 32
> +
> #ifdef __CLC_VECSIZE
> - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> +
> +#if __CLC_VECSIZE == 3
> +# define __CLC_OFFSET 4
> #else
> - FUNC(, 1, __CLC_GENTYPE, __private);
> - FUNC(, 1, __CLC_GENTYPE, __local);
> - FUNC(, 1, __CLC_GENTYPE, __global);
> - FUNC(, 1, __CLC_GENTYPE, __constant);
> +# define __CLC_OFFSET __CLC_VECSIZE
> +#endif
> +
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __private);
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __local);
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __global);
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __constant);
> +
> +#undef __CLC_OFFSET
> +#else
> + FUNC(, 1, 1, __CLC_GENTYPE, __private);
> + FUNC(, 1, 1, __CLC_GENTYPE, __local);
> + FUNC(, 1, 1, __CLC_GENTYPE, __global);
> + FUNC(, 1, 1, __CLC_GENTYPE, __constant);
> #endif
> #endif
> --
> 2.13.5
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20171001/080440f7/attachment.html>
More information about the Libclc-dev
mailing list