[Libclc-dev] [PATCH 1/1] Provide vstore_half helper to workaround clc restrictions

Tom Stellard via Libclc-dev libclc-dev at lists.llvm.org
Wed Sep 21 07:45:14 PDT 2016


On Tue, Sep 20, 2016 at 03:36:51PM -0400, Jan Vesely via Libclc-dev wrote:
> clang won't accept half precision loads and stores without cl_khr_fp16 since r281904
> Tested on clover carrizo and iceland
> 

What is the problem here?  Are half vload/vstore allowed without cl_khr_fp16?

-Tom

> Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> ---
>  generic/lib/SOURCES                       |  1 +
>  generic/lib/shared/vstore.cl              | 53 +++++++++++++++++++------------
>  generic/lib/shared/vstore_half.inc        | 12 +++----
>  generic/lib/shared/vstore_half_helpers.ll | 35 ++++++++++++++++++++
>  4 files changed, 75 insertions(+), 26 deletions(-)
>  create mode 100644 generic/lib/shared/vstore_half_helpers.ll
> 
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index 423a50b..ecd2e73 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -142,6 +142,7 @@ shared/max.cl
>  shared/min.cl
>  shared/vload.cl
>  shared/vstore.cl
> +shared/vstore_half_helpers.ll
>  workitem/get_global_id.cl
>  workitem/get_global_size.cl
>  image/get_image_dim.cl
> diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
> index ebc9446..2838384 100644
> --- a/generic/lib/shared/vstore.cl
> +++ b/generic/lib/shared/vstore.cl
> @@ -52,32 +52,45 @@ VSTORE_TYPES()
>  #endif
>  
>  /* vstore_half are legal even without cl_khr_fp16 */
> +#define DECLARE_HELPER(STYPE, AS) void __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *);
>  
> -#define VEC_STORE1(val) mem[offset++] = val;
> -#define VEC_STORE2(val) \
> -	VEC_STORE1(val.lo) \
> -	VEC_STORE1(val.hi)
> -#define VEC_STORE3(val) \
> -	VEC_STORE1(val.s0) \
> -	VEC_STORE1(val.s1) \
> -	VEC_STORE1(val.s2)
> -#define VEC_STORE4(val) \
> -	VEC_STORE2(val.lo) \
> -	VEC_STORE2(val.hi)
> -#define VEC_STORE8(val) \
> -	VEC_STORE4(val.lo) \
> -	VEC_STORE4(val.hi)
> -#define VEC_STORE16(val) \
> -	VEC_STORE8(val.lo) \
> -	VEC_STORE8(val.hi)
> +DECLARE_HELPER(float, __private);
> +DECLARE_HELPER(float, __global);
> +DECLARE_HELPER(float, __local);
>  
> -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +DECLARE_HELPER(double, __private);
> +DECLARE_HELPER(double, __global);
> +DECLARE_HELPER(double, __local);
> +#endif
> +
> +
> +#define VEC_STORE1(STYPE, AS, val) __clc_vstore_half_##STYPE##_helper##AS (val, &mem[offset++]);
> +#define VEC_STORE2(STYPE, AS, val) \
> +	VEC_STORE1(STYPE, AS, val.lo) \
> +	VEC_STORE1(STYPE, AS, val.hi)
> +#define VEC_STORE3(STYPE, AS, val) \
> +	VEC_STORE1(STYPE, AS, val.s0) \
> +	VEC_STORE1(STYPE, AS, val.s1) \
> +	VEC_STORE1(STYPE, AS, val.s2)
> +#define VEC_STORE4(STYPE, AS, val) \
> +	VEC_STORE2(STYPE, AS, val.lo) \
> +	VEC_STORE2(STYPE, AS, val.hi)
> +#define VEC_STORE8(STYPE, AS, val) \
> +	VEC_STORE4(STYPE, AS, val.lo) \
> +	VEC_STORE4(STYPE, AS, val.hi)
> +#define VEC_STORE16(STYPE, AS, val) \
> +	VEC_STORE8(STYPE, AS, val.lo) \
> +	VEC_STORE8(STYPE, AS, val.hi)
> +
> +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
>    _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
>      offset *= VEC_SIZE; \
> -    VEC_STORE##VEC_SIZE(vec) \
> +    VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
>    }
>  
> -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
> +#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS)
>  
>  #define __CLC_BODY "vstore_half.inc"
>  #include <clc/math/gentype.inc>
> diff --git a/generic/lib/shared/vstore_half.inc b/generic/lib/shared/vstore_half.inc
> index 8ed03a0..fee52bc 100644
> --- a/generic/lib/shared/vstore_half.inc
> +++ b/generic/lib/shared/vstore_half.inc
> @@ -1,10 +1,10 @@
>  
>  #ifdef __CLC_VECSIZE
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> -  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
> +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
>  #else
> -  FUNC(, 1, __CLC_GENTYPE, __private);
> -  FUNC(, 1, __CLC_GENTYPE, __local);
> -  FUNC(, 1, __CLC_GENTYPE, __global);
> +  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
> +  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
> +  FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
>  #endif
> diff --git a/generic/lib/shared/vstore_half_helpers.ll b/generic/lib/shared/vstore_half_helpers.ll
> new file mode 100644
> index 0000000..e958664
> --- /dev/null
> +++ b/generic/lib/shared/vstore_half_helpers.ll
> @@ -0,0 +1,35 @@
> +define void @__clc_vstore_half_float_helper__private(float %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
> +  %res = fptrunc float %data to half
> +  store half %res, half addrspace(0)* %ptr
> +  ret void
> +}
> +
> +define void @__clc_vstore_half_float_helper__global(float %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
> +  %res = fptrunc float %data to half
> +  store half %res, half addrspace(1)* %ptr
> +  ret void
> +}
> +
> +define void @__clc_vstore_half_float_helper__local(float %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
> +  %res = fptrunc float %data to half
> +  store half %res, half addrspace(3)* %ptr
> +  ret void
> +}
> +
> +define void @__clc_vstore_half_double_helper__private(double %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
> +  %res = fptrunc double %data to half
> +  store half %res, half addrspace(0)* %ptr
> +  ret void
> +}
> +
> +define void @__clc_vstore_half_double_helper__global(double %data, half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
> +  %res = fptrunc double %data to half
> +  store half %res, half addrspace(1)* %ptr
> +  ret void
> +}
> +
> +define void @__clc_vstore_half_double_helper__local(double %data, half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
> +  %res = fptrunc double %data to half
> +  store half %res, half addrspace(3)* %ptr
> +  ret void
> +}
> -- 
> 2.7.4
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev


More information about the Libclc-dev mailing list