[Libclc-dev] [PATCH 1/1] Implement vload_half{,n}

Jan Vesely via Libclc-dev libclc-dev at lists.llvm.org
Mon Aug 21 09:01:02 PDT 2017


On Sun, 2017-08-20 at 21:10 -0500, Aaron Watry wrote:
> Address space mappings aside, I've confirmed that this also fixes the
> new piglits on my pitcairn (gcn 1.0).

thanks for testing

> 
> --Aaron
> 
> On Sun, Aug 20, 2017 at 9:02 PM, Aaron Watry <awatry at gmail.com> wrote:
> > On Thu, Aug 17, 2017 at 6:31 PM, Jan Vesely via Libclc-dev
> > <libclc-dev at lists.llvm.org> wrote:
> > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
> > > ---
> > > Fixes newly posted vload_half piglits on Turks, Carrizo, and Iceland.
> > > 
> > >  generic/include/clc/shared/vload.h       | 45 ++++++++++++++++-------------
> > >  generic/lib/SOURCES                      |  1 +
> > >  generic/lib/shared/vload.cl              | 49 ++++++++++++++++++++++++++++++++
> > >  generic/lib/shared/vload_half.inc        | 13 +++++++++
> > >  generic/lib/shared/vload_half_helpers.ll | 23 +++++++++++++++
> > >  5 files changed, 111 insertions(+), 20 deletions(-)
> > >  create mode 100644 generic/lib/shared/vload_half.inc
> > >  create mode 100644 generic/lib/shared/vload_half_helpers.ll
> > > 
> > > diff --git a/generic/include/clc/shared/vload.h b/generic/include/clc/shared/vload.h
> > > index 93d0750..1da4e99 100644
> > > --- a/generic/include/clc/shared/vload.h
> > > +++ b/generic/include/clc/shared/vload.h
> > > @@ -1,18 +1,21 @@
> > > -#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
> > > -  _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
> > > +#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
> > > +  _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
> > > 
> > > -#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
> > > +#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
> > > +
> > > +#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
> > > 
> > >  #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
> > > +  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
> > > 
> > >  #define _CLC_VECTOR_VLOAD_PRIM() \
> > >      _CLC_VECTOR_VLOAD_PRIM1(char) \
> > > @@ -24,14 +27,16 @@
> > >      _CLC_VECTOR_VLOAD_PRIM1(long) \
> > >      _CLC_VECTOR_VLOAD_PRIM1(ulong) \
> > >      _CLC_VECTOR_VLOAD_PRIM1(float) \
> > > -
> > > +    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
> > > +
> > >  #ifdef cl_khr_fp64
> > > -#define _CLC_VECTOR_VLOAD() \
> > > -  _CLC_VECTOR_VLOAD_PRIM1(double) \
> > > -  _CLC_VECTOR_VLOAD_PRIM()
> > > -#else
> > > -#define _CLC_VECTOR_VLOAD() \
> > > -  _CLC_VECTOR_VLOAD_PRIM()
> > > +#pragma OPENCL EXTENSION cl_khr_fp64: enable
> > > +  _CLC_VECTOR_VLOAD_PRIM1(double)
> > >  #endif
> > > 
> > > -_CLC_VECTOR_VLOAD()
> > > +_CLC_VECTOR_VLOAD_PRIM()
> > > +// Plain vload_half also needs to be declared
> > > +_CLC_VLOAD_DECL(_half, half, float, , __constant)
> > > +_CLC_VLOAD_DECL(_half, half, float, , __global)
> > > +_CLC_VLOAD_DECL(_half, half, float, , __local)
> > > +_CLC_VLOAD_DECL(_half, half, float, , __private)
> > > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> > > index 9e0157b..8b080f5 100644
> > > --- a/generic/lib/SOURCES
> > > +++ b/generic/lib/SOURCES
> > > @@ -144,6 +144,7 @@ shared/clamp.cl
> > >  shared/max.cl
> > >  shared/min.cl
> > >  shared/vload.cl
> > > +shared/vload_half_helpers.ll
> > >  shared/vstore.cl
> > >  shared/vstore_half_helpers.ll
> > >  workitem/get_global_id.cl
> > > diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> > > index 8897200..46d6486 100644
> > > --- a/generic/lib/shared/vload.cl
> > > +++ b/generic/lib/shared/vload.cl
> > > @@ -50,3 +50,52 @@ VLOAD_TYPES()
> > >  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
> > >      VLOAD_ADDR_SPACES(double)
> > >  #endif
> > > +
> > > +/* vload_half are legal even without cl_khr_fp16 */
> > > +
> > > +float __clc_vload_half_float_helper__constant(const __constant half *);
> > > +float __clc_vload_half_float_helper__global(const __global half *);
> > > +float __clc_vload_half_float_helper__local(const __local half *);
> > > +float __clc_vload_half_float_helper__private(const __private half *);
> > > +
> > > +/* no vload_half for double */
> > > +
> > > +#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
> > > +#define VEC_LOAD2(val, AS) \
> > > +       VEC_LOAD1(val.lo, AS) \
> > > +       VEC_LOAD1(val.hi, AS)
> > > +#define VEC_LOAD3(val, AS) \
> > > +       VEC_LOAD1(val.s0, AS) \
> > > +       VEC_LOAD1(val.s1, AS) \
> > > +       VEC_LOAD1(val.s2, AS)
> > > +#define VEC_LOAD4(val, AS) \
> > > +       VEC_LOAD2(val.lo, AS) \
> > > +       VEC_LOAD2(val.hi, AS)
> > > +#define VEC_LOAD8(val, AS) \
> > > +       VEC_LOAD4(val.lo, AS) \
> > > +       VEC_LOAD4(val.hi, AS)
> > > +#define VEC_LOAD16(val, AS) \
> > > +       VEC_LOAD8(val.lo, AS) \
> > > +       VEC_LOAD8(val.hi, AS)
> > > +
> > > +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
> > > +  _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
> > > +    offset *= VEC_SIZE; \
> > > +    TYPE __tmp; \
> > > +    VEC_LOAD##VEC_SIZE(__tmp, AS) \
> > > +    return __tmp; \
> > > +  }
> > > +
> > > +#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
> > > +
> > > +#define __CLC_BODY "vload_half.inc"
> > > +#include <clc/math/gentype.inc>
> > > +#undef __CLC_BODY
> > > +#undef FUNC
> > > +#undef __FUNC
> > > +#undef VEC_LOAD16
> > > +#undef VEC_LOAD8
> > > +#undef VEC_LOAD4
> > > +#undef VEC_LOAD3
> > > +#undef VEC_LOAD2
> > > +#undef VEC_LOAD1
> > > diff --git a/generic/lib/shared/vload_half.inc b/generic/lib/shared/vload_half.inc
> > > new file mode 100644
> > > index 0000000..00dae8a
> > > --- /dev/null
> > > +++ b/generic/lib/shared/vload_half.inc
> > > @@ -0,0 +1,13 @@
> > > +#if __CLC_FPSIZE == 32
> > > +#ifdef __CLC_VECSIZE
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
> > > +#else
> > > +  FUNC(, 1, __CLC_GENTYPE, __private);
> > > +  FUNC(, 1, __CLC_GENTYPE, __local);
> > > +  FUNC(, 1, __CLC_GENTYPE, __global);
> > > +  FUNC(, 1, __CLC_GENTYPE, __constant);
> > > +#endif
> > > +#endif
> > > diff --git a/generic/lib/shared/vload_half_helpers.ll b/generic/lib/shared/vload_half_helpers.ll
> > > new file mode 100644
> > > index 0000000..b8c905a
> > > --- /dev/null
> > > +++ b/generic/lib/shared/vload_half_helpers.ll
> > > @@ -0,0 +1,23 @@
> > > +define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
> > > +  %data = load half, half addrspace(0)* %ptr
> > 
> > Not sure I'm a fan of hard-coding the address space mappings here. In
> > the past, we've defined functions with a name of _addr[1234], and then
> > had target-specific mappings between the named address spaces and the
> > numeric ones.

I'm not a fan either. for now this patch follows vstore_half for
consistency. I thinks there is a bigger problem with using LLVM AS in
libclc. I've been considering 2 ways to address it:

a) rewrite everything in CLC. This will require adding clang builtins
for missing functionality.

b) pass .ll files through clang preprocessor. This would allow .ll
includes (for target data layout), and target specific defines in
shared .ll code (like clang/llvm version dependent AS mapping).

I have a slight preference for a), but it might restrict what
clang/llvm version we support (or add workarounds like the current
waitcnt implementation). I'd also have to see how receptive the clang
ppl are to having builtins for everything that we need and can't be
done with pure CLC.

Do you think this should be addressed before any changes that use .ll
files land?

Jan

> > 
> > --Aaron
> > 
> > > +  %res = fpext half %data to float
> > > +  ret float %res
> > > +}
> > > +
> > > +define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
> > > +  %data = load half, half addrspace(1)* %ptr
> > > +  %res = fpext half %data to float
> > > +  ret float %res
> > > +}
> > > +
> > > +define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
> > > +  %data = load half, half addrspace(3)* %ptr
> > > +  %res = fpext half %data to float
> > > +  ret float %res
> > > +}
> > > +
> > > +define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline {
> > > +  %data = load half, half addrspace(2)* %ptr
> > > +  %res = fpext half %data to float
> > > +  ret float %res
> > > +}
> > > --
> > > 2.13.5
> > > 
> > > _______________________________________________
> > > Libclc-dev mailing list
> > > Libclc-dev at lists.llvm.org
> > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20170821/8c6442ec/attachment-0001.sig>


More information about the Libclc-dev mailing list