[Libclc-dev] [PATCH 2/2] shared: Implement aligned vector stores (vstorea_half)

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Sun Oct 22 05:45:07 PDT 2017


On Sat, Oct 21, 2017, 11:36 PM Jan Vesely <jan.vesely at rutgers.edu> wrote:

> Float version passes newly posted piglit tests on turks, float and double
> pass on carrizo.
> v2: scalar vstorea_half
>
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  generic/include/clc/shared/vstore.h | 41
> +++++++++++++++++++++++++------------
>  generic/lib/shared/vstore.cl        | 30 ++++++++++++++-------------
>  generic/lib/shared/vstore_half.inc  | 21 +++++++++++++------
>  3 files changed, 59 insertions(+), 33 deletions(-)
>
> diff --git a/generic/include/clc/shared/vstore.h
> b/generic/include/clc/shared/vstore.h
> index 0e3f694..17a0d29 100644
> --- a/generic/include/clc/shared/vstore.h
> +++ b/generic/include/clc/shared/vstore.h
> @@ -16,37 +16,52 @@
>  #define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
>    _CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE) \
>
> -#define _CLC_VECTOR_VSTORE_PRIM() \
> -    _CLC_VECTOR_VSTORE_PRIM1(char) \
> -    _CLC_VECTOR_VSTORE_PRIM1(uchar) \
> -    _CLC_VECTOR_VSTORE_PRIM1(short) \
> -    _CLC_VECTOR_VSTORE_PRIM1(ushort) \
> -    _CLC_VECTOR_VSTORE_PRIM1(int) \
> -    _CLC_VECTOR_VSTORE_PRIM1(uint) \
> -    _CLC_VECTOR_VSTORE_PRIM1(long) \
> -    _CLC_VECTOR_VSTORE_PRIM1(ulong) \
> -    _CLC_VECTOR_VSTORE_PRIM1(float) \
> -    _CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
> +_CLC_VECTOR_VSTORE_PRIM1(char)
> +_CLC_VECTOR_VSTORE_PRIM1(uchar)
> +_CLC_VECTOR_VSTORE_PRIM1(short)
> +_CLC_VECTOR_VSTORE_PRIM1(ushort)
> +_CLC_VECTOR_VSTORE_PRIM1(int)
> +_CLC_VECTOR_VSTORE_PRIM1(uint)
> +_CLC_VECTOR_VSTORE_PRIM1(long)
> +_CLC_VECTOR_VSTORE_PRIM1(ulong)
> +_CLC_VECTOR_VSTORE_PRIM1(float)
> +_CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
> +// Use suffix to declare aligned vstorea_halfN
> +_CLC_VECTOR_VSTORE_PRIM3(a_half, half, float)
>
>  #ifdef cl_khr_fp64
>    _CLC_VECTOR_VSTORE_PRIM1(double)
>    _CLC_VECTOR_VSTORE_PRIM3(_half, half, double)
> +  // Use suffix to declare aligned vstorea_halfN
> +  _CLC_VECTOR_VSTORE_PRIM3(a_half, half, double)
> +
> +  // Scalar vstore_half also needs to be declared
>    _CLC_VSTORE_DECL(_half, half, double, , __private)
>    _CLC_VSTORE_DECL(_half, half, double, , __local)
>    _CLC_VSTORE_DECL(_half, half, double, , __global)
> +
> +  // Scalar vstorea_half is ont part of the specs but CTS expects it
>

"ont" again

+  _CLC_VSTORE_DECL(a_half, half, double, , __private)
> +  _CLC_VSTORE_DECL(a_half, half, double, , __local)
> +  _CLC_VSTORE_DECL(a_half, half, double, , __global)
>  #endif
>
>  #ifdef cl_khr_fp16
>    _CLC_VECTOR_VSTORE_PRIM1(half)
>  #endif
>
> -_CLC_VECTOR_VSTORE_PRIM()
> +// Scalar vstore_half also needs to be declared
>  _CLC_VSTORE_DECL(_half, half, float, , __private)
>  _CLC_VSTORE_DECL(_half, half, float, , __local)
>  _CLC_VSTORE_DECL(_half, half, float, , __global)
>
> +// Scalar vstorea_half is ont part of the specs but CTS expects it
>

"ont" here as well

otherwise looks fine to me.

--Aaron


+_CLC_VSTORE_DECL(a_half, half, float, , __private)
> +_CLC_VSTORE_DECL(a_half, half, float, , __local)
> +_CLC_VSTORE_DECL(a_half, half, float, , __global)
> +
> +
>  #undef _CLC_VSTORE_DECL
>  #undef _CLC_VECTOR_VSTORE_DECL
>  #undef _CLC_VECTOR_VSTORE_PRIM3
>  #undef _CLC_VECTOR_VSTORE_PRIM1
> -#undef _CLC_VECTOR_VSTORE_PRIM
> diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
> index 3343c16..e5383a8 100644
> --- a/generic/lib/shared/vstore.cl
> +++ b/generic/lib/shared/vstore.cl
> @@ -33,23 +33,22 @@
>      VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
>      VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
>
> -#define VSTORE_TYPES() \
> -    VSTORE_ADDR_SPACES(char) \
> -    VSTORE_ADDR_SPACES(uchar) \
> -    VSTORE_ADDR_SPACES(short) \
> -    VSTORE_ADDR_SPACES(ushort) \
> -    VSTORE_ADDR_SPACES(int) \
> -    VSTORE_ADDR_SPACES(uint) \
> -    VSTORE_ADDR_SPACES(long) \
> -    VSTORE_ADDR_SPACES(ulong) \
> -    VSTORE_ADDR_SPACES(float) \
> +VSTORE_ADDR_SPACES(char)
> +VSTORE_ADDR_SPACES(uchar)
> +VSTORE_ADDR_SPACES(short)
> +VSTORE_ADDR_SPACES(ushort)
> +VSTORE_ADDR_SPACES(int)
> +VSTORE_ADDR_SPACES(uint)
> +VSTORE_ADDR_SPACES(long)
> +VSTORE_ADDR_SPACES(ulong)
> +VSTORE_ADDR_SPACES(float)
>
> -VSTORE_TYPES()
>
>  #ifdef cl_khr_fp64
>  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
>      VSTORE_ADDR_SPACES(double)
>  #endif
> +
>  #ifdef cl_khr_fp16
>  #pragma OPENCL EXTENSION cl_khr_fp16 : enable
>      VSTORE_ADDR_SPACES(half)
> @@ -95,13 +94,17 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
>         VEC_STORE8(STYPE, AS, val.lo) \
>         VEC_STORE8(STYPE, AS, val.hi)
>
> -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
> +#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
>    _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t
> offset, AS half *mem) { \
>      offset *= VEC_SIZE; \
>      VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
> +  } \
> +  _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t
> offset, AS half *mem) { \
> +    offset *= OFFSET; \
> +    VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
>    }
>
> -#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE,
> TYPE, STYPE, AS)
> +#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) __FUNC(SUFFIX,
> VEC_SIZE, OFFSET, TYPE, STYPE, AS)
>
>  #define __CLC_BODY "vstore_half.inc"
>  #include <clc/math/gentype.inc>
> @@ -115,6 +118,5 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
>  #undef VEC_LOAD2
>  #undef VEC_LOAD1
>  #undef DECLARE_HELPER
> -#undef VSTORE_TYPES
>  #undef VSTORE_ADDR_SPACES
>  #undef VSTORE_VECTORIZE
> diff --git a/generic/lib/shared/vstore_half.inc
> b/generic/lib/shared/vstore_half.inc
> index fee52bc..ee4e38b 100644
> --- a/generic/lib/shared/vstore_half.inc
> +++ b/generic/lib/shared/vstore_half.inc
> @@ -1,10 +1,19 @@
>
>  #ifdef __CLC_VECSIZE
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE,
> __private);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE,
> __local);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE,
> __global);
> +
> +#if __CLC_VECSIZE == 3
> +#  define __CLC_OFFSET 4
> +#else
> +#  define __CLC_OFFSET __CLC_VECSIZE
> +#endif
> +
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __CLC_SCALAR_GENTYPE, __private);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __CLC_SCALAR_GENTYPE, __local);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE,
> __CLC_SCALAR_GENTYPE, __global);
> +
> +#undef __CLC_OFFSET
>  #else
> -  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
> -  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
> -  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
> +  FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
> +  FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
> +  FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
>  #endif
> --
> 2.13.6
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20171022/88251f65/attachment-0001.html>


More information about the Libclc-dev mailing list