[Libclc-dev] [PATCH 1/1] Provide vstore_half helper to workaround clc restrictions
Tom Stellard via Libclc-dev
libclc-dev at lists.llvm.org
Wed Sep 21 10:28:38 PDT 2016
On Wed, Sep 21, 2016 at 11:44:42AM -0400, Jan Vesely wrote:
> On Wed, 2016-09-21 at 14:45 +0000, Tom Stellard via Libclc-dev wrote:
> > On Tue, Sep 20, 2016 at 03:36:51PM -0400, Jan Vesely via Libclc-dev
> > wrote:
> > >
> > > clang won't accept half precision loads and stores without
> > > cl_khr_fp16 since r281904
> > > Tested on clover carrizo and iceland
> > >
> >
> > What is the problem here? Are half vload/vstore allowed without
> > cl_khr_fp16?
>
> vload_half, vstore_half are allowed without the extension (and include
> scalar variant).
> vstoreN(half), vloadN(half) need the extension.
>
OK. LGTM.
> Jan
>
> >
> > -Tom
> >
> > >
> > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > > ---
> > > generic/lib/SOURCES | 1 +
> > > generic/lib/shared/vstore.cl | 53
> > > +++++++++++++++++++------------
> > > generic/lib/shared/vstore_half.inc | 12 +++----
> > > generic/lib/shared/vstore_half_helpers.ll | 35
> > > ++++++++++++++++++++
> > > 4 files changed, 75 insertions(+), 26 deletions(-)
> > > create mode 100644 generic/lib/shared/vstore_half_helpers.ll
> > >
> > > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> > > index 423a50b..ecd2e73 100644
> > > --- a/generic/lib/SOURCES
> > > +++ b/generic/lib/SOURCES
> > > @@ -142,6 +142,7 @@ shared/max.cl
> > > shared/min.cl
> > > shared/vload.cl
> > > shared/vstore.cl
> > > +shared/vstore_half_helpers.ll
> > > workitem/get_global_id.cl
> > > workitem/get_global_size.cl
> > > image/get_image_dim.cl
> > > diff --git a/generic/lib/shared/vstore.cl
> > > b/generic/lib/shared/vstore.cl
> > > index ebc9446..2838384 100644
> > > --- a/generic/lib/shared/vstore.cl
> > > +++ b/generic/lib/shared/vstore.cl
> > > @@ -52,32 +52,45 @@ VSTORE_TYPES()
> > > #endif
> > >
> > > /* vstore_half are legal even without cl_khr_fp16 */
> > > +#define DECLARE_HELPER(STYPE, AS) void
> > > __clc_vstore_half_##STYPE##_helper##AS(STYPE, AS half *);
> > >
> > > -#define VEC_STORE1(val) mem[offset++] = val;
> > > -#define VEC_STORE2(val) \
> > > - VEC_STORE1(val.lo) \
> > > - VEC_STORE1(val.hi)
> > > -#define VEC_STORE3(val) \
> > > - VEC_STORE1(val.s0) \
> > > - VEC_STORE1(val.s1) \
> > > - VEC_STORE1(val.s2)
> > > -#define VEC_STORE4(val) \
> > > - VEC_STORE2(val.lo) \
> > > - VEC_STORE2(val.hi)
> > > -#define VEC_STORE8(val) \
> > > - VEC_STORE4(val.lo) \
> > > - VEC_STORE4(val.hi)
> > > -#define VEC_STORE16(val) \
> > > - VEC_STORE8(val.lo) \
> > > - VEC_STORE8(val.hi)
> > > +DECLARE_HELPER(float, __private);
> > > +DECLARE_HELPER(float, __global);
> > > +DECLARE_HELPER(float, __local);
> > >
> > > -#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> > > +#ifdef cl_khr_fp64
> > > +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > > +DECLARE_HELPER(double, __private);
> > > +DECLARE_HELPER(double, __global);
> > > +DECLARE_HELPER(double, __local);
> > > +#endif
> > > +
> > > +
> > > +#define VEC_STORE1(STYPE, AS, val)
> > > __clc_vstore_half_##STYPE##_helper##AS (val, &mem[offset++]);
> > > +#define VEC_STORE2(STYPE, AS, val) \
> > > + VEC_STORE1(STYPE, AS, val.lo) \
> > > + VEC_STORE1(STYPE, AS, val.hi)
> > > +#define VEC_STORE3(STYPE, AS, val) \
> > > + VEC_STORE1(STYPE, AS, val.s0) \
> > > + VEC_STORE1(STYPE, AS, val.s1) \
> > > + VEC_STORE1(STYPE, AS, val.s2)
> > > +#define VEC_STORE4(STYPE, AS, val) \
> > > + VEC_STORE2(STYPE, AS, val.lo) \
> > > + VEC_STORE2(STYPE, AS, val.hi)
> > > +#define VEC_STORE8(STYPE, AS, val) \
> > > + VEC_STORE4(STYPE, AS, val.lo) \
> > > + VEC_STORE4(STYPE, AS, val.hi)
> > > +#define VEC_STORE16(STYPE, AS, val) \
> > > + VEC_STORE8(STYPE, AS, val.lo) \
> > > + VEC_STORE8(STYPE, AS, val.hi)
> > > +
> > > +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
> > > _CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t
> > > offset, AS half *mem) { \
> > > offset *= VEC_SIZE; \
> > > - VEC_STORE##VEC_SIZE(vec) \
> > > + VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
> > > }
> > >
> > > -#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE,
> > > TYPE, AS)
> > > +#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX,
> > > VEC_SIZE, TYPE, STYPE, AS)
> > >
> > > #define __CLC_BODY "vstore_half.inc"
> > > #include <clc/math/gentype.inc>
> > > diff --git a/generic/lib/shared/vstore_half.inc
> > > b/generic/lib/shared/vstore_half.inc
> > > index 8ed03a0..fee52bc 100644
> > > --- a/generic/lib/shared/vstore_half.inc
> > > +++ b/generic/lib/shared/vstore_half.inc
> > > @@ -1,10 +1,10 @@
> > >
> > > #ifdef __CLC_VECSIZE
> > > - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> > > - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> > > - FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> > > + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE,
> > > __CLC_SCALAR_GENTYPE, __private);
> > > + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE,
> > > __CLC_SCALAR_GENTYPE, __local);
> > > + FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE,
> > > __CLC_SCALAR_GENTYPE, __global);
> > > #else
> > > - FUNC(, 1, __CLC_GENTYPE, __private);
> > > - FUNC(, 1, __CLC_GENTYPE, __local);
> > > - FUNC(, 1, __CLC_GENTYPE, __global);
> > > + FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
> > > + FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
> > > + FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
> > > #endif
> > > diff --git a/generic/lib/shared/vstore_half_helpers.ll
> > > b/generic/lib/shared/vstore_half_helpers.ll
> > > new file mode 100644
> > > index 0000000..e958664
> > > --- /dev/null
> > > +++ b/generic/lib/shared/vstore_half_helpers.ll
> > > @@ -0,0 +1,35 @@
> > > +define void @__clc_vstore_half_float_helper__private(float %data,
> > > half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
> > > + %res = fptrunc float %data to half
> > > + store half %res, half addrspace(0)* %ptr
> > > + ret void
> > > +}
> > > +
> > > +define void @__clc_vstore_half_float_helper__global(float %data,
> > > half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
> > > + %res = fptrunc float %data to half
> > > + store half %res, half addrspace(1)* %ptr
> > > + ret void
> > > +}
> > > +
> > > +define void @__clc_vstore_half_float_helper__local(float %data,
> > > half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
> > > + %res = fptrunc float %data to half
> > > + store half %res, half addrspace(3)* %ptr
> > > + ret void
> > > +}
> > > +
> > > +define void @__clc_vstore_half_double_helper__private(double
> > > %data, half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
> > > + %res = fptrunc double %data to half
> > > + store half %res, half addrspace(0)* %ptr
> > > + ret void
> > > +}
> > > +
> > > +define void @__clc_vstore_half_double_helper__global(double %data,
> > > half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
> > > + %res = fptrunc double %data to half
> > > + store half %res, half addrspace(1)* %ptr
> > > + ret void
> > > +}
> > > +
> > > +define void @__clc_vstore_half_double_helper__local(double %data,
> > > half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
> > > + %res = fptrunc double %data to half
> > > + store half %res, half addrspace(3)* %ptr
> > > + ret void
> > > +}
> > > --
> > > 2.7.4
> > >
> > > _______________________________________________
> > > Libclc-dev mailing list
> > > Libclc-dev at lists.llvm.org
> > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
> > _______________________________________________
> > Libclc-dev mailing list
> > Libclc-dev at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
More information about the Libclc-dev
mailing list