[Libclc-dev] [PATCH 1/1] Implement vload_half{,n}
Aaron Watry via Libclc-dev
libclc-dev at lists.llvm.org
Sun Aug 20 19:02:54 PDT 2017
On Thu, Aug 17, 2017 at 6:31 PM, Jan Vesely via Libclc-dev
<libclc-dev at lists.llvm.org> wrote:
> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
> Fixes newly posted vload_half piglits on Turks, Carrizo, and Iceland.
>
> generic/include/clc/shared/vload.h | 45 ++++++++++++++++-------------
> generic/lib/SOURCES | 1 +
> generic/lib/shared/vload.cl | 49 ++++++++++++++++++++++++++++++++
> generic/lib/shared/vload_half.inc | 13 +++++++++
> generic/lib/shared/vload_half_helpers.ll | 23 +++++++++++++++
> 5 files changed, 111 insertions(+), 20 deletions(-)
> create mode 100644 generic/lib/shared/vload_half.inc
> create mode 100644 generic/lib/shared/vload_half_helpers.ll
>
> diff --git a/generic/include/clc/shared/vload.h b/generic/include/clc/shared/vload.h
> index 93d0750..1da4e99 100644
> --- a/generic/include/clc/shared/vload.h
> +++ b/generic/include/clc/shared/vload.h
> @@ -1,18 +1,21 @@
> -#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
> - _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
> +#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
> + _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
>
> -#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
> - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
> - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
> - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
> - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
> - _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
> +#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
> + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
> + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
> + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
> + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
> + _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
> +
> +#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
> + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
> + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
> + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> + _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
>
> #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
> - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
> - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
> - _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
> + _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
>
> #define _CLC_VECTOR_VLOAD_PRIM() \
> _CLC_VECTOR_VLOAD_PRIM1(char) \
> @@ -24,14 +27,16 @@
> _CLC_VECTOR_VLOAD_PRIM1(long) \
> _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> _CLC_VECTOR_VLOAD_PRIM1(float) \
> -
> + _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> +
> #ifdef cl_khr_fp64
> -#define _CLC_VECTOR_VLOAD() \
> - _CLC_VECTOR_VLOAD_PRIM1(double) \
> - _CLC_VECTOR_VLOAD_PRIM()
> -#else
> -#define _CLC_VECTOR_VLOAD() \
> - _CLC_VECTOR_VLOAD_PRIM()
> +#pragma OPENCL EXTENSION cl_khr_fp64: enable
> + _CLC_VECTOR_VLOAD_PRIM1(double)
> #endif
>
> -_CLC_VECTOR_VLOAD()
> +_CLC_VECTOR_VLOAD_PRIM()
> +// Plain vload_half also needs to be declared
> +_CLC_VLOAD_DECL(_half, half, float, , __constant)
> +_CLC_VLOAD_DECL(_half, half, float, , __global)
> +_CLC_VLOAD_DECL(_half, half, float, , __local)
> +_CLC_VLOAD_DECL(_half, half, float, , __private)
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 9e0157b..8b080f5 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -144,6 +144,7 @@ shared/clamp.cl
> shared/max.cl
> shared/min.cl
> shared/vload.cl
> +shared/vload_half_helpers.ll
> shared/vstore.cl
> shared/vstore_half_helpers.ll
> workitem/get_global_id.cl
> diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> index 8897200..46d6486 100644
> --- a/generic/lib/shared/vload.cl
> +++ b/generic/lib/shared/vload.cl
> @@ -50,3 +50,52 @@ VLOAD_TYPES()
> #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> VLOAD_ADDR_SPACES(double)
> #endif
> +
> +/* vload_half are legal even without cl_khr_fp16 */
> +
> +float __clc_vload_half_float_helper__constant(const __constant half *);
> +float __clc_vload_half_float_helper__global(const __global half *);
> +float __clc_vload_half_float_helper__local(const __local half *);
> +float __clc_vload_half_float_helper__private(const __private half *);
> +
> +/* no vload_half for double */
> +
> +#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
> +#define VEC_LOAD2(val, AS) \
> + VEC_LOAD1(val.lo, AS) \
> + VEC_LOAD1(val.hi, AS)
> +#define VEC_LOAD3(val, AS) \
> + VEC_LOAD1(val.s0, AS) \
> + VEC_LOAD1(val.s1, AS) \
> + VEC_LOAD1(val.s2, AS)
> +#define VEC_LOAD4(val, AS) \
> + VEC_LOAD2(val.lo, AS) \
> + VEC_LOAD2(val.hi, AS)
> +#define VEC_LOAD8(val, AS) \
> + VEC_LOAD4(val.lo, AS) \
> + VEC_LOAD4(val.hi, AS)
> +#define VEC_LOAD16(val, AS) \
> + VEC_LOAD8(val.lo, AS) \
> + VEC_LOAD8(val.hi, AS)
> +
> +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> + _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
> + offset *= VEC_SIZE; \
> + TYPE __tmp; \
> + VEC_LOAD##VEC_SIZE(__tmp, AS) \
> + return __tmp; \
> + }
> +
> +#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
> +
> +#define __CLC_BODY "vload_half.inc"
> +#include <clc/math/gentype.inc>
> +#undef __CLC_BODY
> +#undef FUNC
> +#undef __FUNC
> +#undef VEC_LOAD16
> +#undef VEC_LOAD8
> +#undef VEC_LOAD4
> +#undef VEC_LOAD3
> +#undef VEC_LOAD2
> +#undef VEC_LOAD1
> diff --git a/generic/lib/shared/vload_half.inc b/generic/lib/shared/vload_half.inc
> new file mode 100644
> index 0000000..00dae8a
> --- /dev/null
> +++ b/generic/lib/shared/vload_half.inc
> @@ -0,0 +1,13 @@
> +#if __CLC_FPSIZE == 32
> +#ifdef __CLC_VECSIZE
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> +#else
> + FUNC(, 1, __CLC_GENTYPE, __private);
> + FUNC(, 1, __CLC_GENTYPE, __local);
> + FUNC(, 1, __CLC_GENTYPE, __global);
> + FUNC(, 1, __CLC_GENTYPE, __constant);
> +#endif
> +#endif
> diff --git a/generic/lib/shared/vload_half_helpers.ll b/generic/lib/shared/vload_half_helpers.ll
> new file mode 100644
> index 0000000..b8c905a
> --- /dev/null
> +++ b/generic/lib/shared/vload_half_helpers.ll
> @@ -0,0 +1,23 @@
> +define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
> + %data = load half, half addrspace(0)* %ptr
Not sure I'm a fan of hard-coding the address space mappings here. In
the past, we've defined functions with a name of _addr[1234], and then
had target-specific mappings between the named address spaces and the
numeric ones.
--Aaron
> + %res = fpext half %data to float
> + ret float %res
> +}
> +
> +define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
> + %data = load half, half addrspace(1)* %ptr
> + %res = fpext half %data to float
> + ret float %res
> +}
> +
> +define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
> + %data = load half, half addrspace(3)* %ptr
> + %res = fpext half %data to float
> + ret float %res
> +}
> +
> +define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline {
> + %data = load half, half addrspace(2)* %ptr
> + %res = fpext half %data to float
> + ret float %res
> +}
> --
> 2.13.5
>
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
More information about the Libclc-dev
mailing list