[Libclc-dev] [PATCH 1/1] Implement vload_half{,n}

Aaron Watry via Libclc-dev libclc-dev at lists.llvm.org
Sun Aug 27 12:58:53 PDT 2017


_

On Mon, Aug 21, 2017 at 11:01 AM, Jan Vesely <jan.vesely at rutgers.edu> wrote:
> On Sun, 2017-08-20 at 21:10 -0500, Aaron Watry wrote:
>> Address space mappings aside, I've confirmed that this also fixes the
>> new piglits on my pitcairn (gcn 1.0).
>
> thanks for testing
>
>>
>> --Aaron
>>
>> On Sun, Aug 20, 2017 at 9:02 PM, Aaron Watry <awatry at gmail.com> wrote:
>> > On Thu, Aug 17, 2017 at 6:31 PM, Jan Vesely via Libclc-dev
>> > <libclc-dev at lists.llvm.org> wrote:
>> > > Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
>> > > ---
>> > > Fixes newly posted vload_half piglits on Turks, Carrizo, and Iceland.
>> > >
>> > >  generic/include/clc/shared/vload.h       | 45 ++++++++++++++++-------------
>> > >  generic/lib/SOURCES                      |  1 +
>> > >  generic/lib/shared/vload.cl              | 49 ++++++++++++++++++++++++++++++++
>> > >  generic/lib/shared/vload_half.inc        | 13 +++++++++
>> > >  generic/lib/shared/vload_half_helpers.ll | 23 +++++++++++++++
>> > >  5 files changed, 111 insertions(+), 20 deletions(-)
>> > >  create mode 100644 generic/lib/shared/vload_half.inc
>> > >  create mode 100644 generic/lib/shared/vload_half_helpers.ll
>> > >
>> > > diff --git a/generic/include/clc/shared/vload.h b/generic/include/clc/shared/vload.h
>> > > index 93d0750..1da4e99 100644
>> > > --- a/generic/include/clc/shared/vload.h
>> > > +++ b/generic/include/clc/shared/vload.h
>> > > @@ -1,18 +1,21 @@
>> > > -#define _CLC_VLOAD_DECL(PRIM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
>> > > -  _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##WIDTH(size_t offset, const ADDR_SPACE PRIM_TYPE *x);
>> > > +#define _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, VEC_TYPE, WIDTH, ADDR_SPACE) \
>> > > +  _CLC_OVERLOAD _CLC_DECL VEC_TYPE vload##SUFFIX##WIDTH(size_t offset, const ADDR_SPACE MEM_TYPE *x);
>> > >
>> > > -#define _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, ADDR_SPACE) \
>> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
>> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
>> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
>> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
>> > > -  _CLC_VLOAD_DECL(PRIM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
>> > > +#define _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, ADDR_SPACE) \
>> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##2, 2, ADDR_SPACE) \
>> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##3, 3, ADDR_SPACE) \
>> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##4, 4, ADDR_SPACE) \
>> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##8, 8, ADDR_SPACE) \
>> > > +  _CLC_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE##16, 16, ADDR_SPACE)
>> > > +
>> > > +#define _CLC_VECTOR_VLOAD_PRIM3(SUFFIX, MEM_TYPE, PRIM_TYPE) \
>> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __private) \
>> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __local) \
>> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __constant) \
>> > > +  _CLC_VECTOR_VLOAD_DECL(SUFFIX, MEM_TYPE, PRIM_TYPE, __global) \
>> > >
>> > >  #define _CLC_VECTOR_VLOAD_PRIM1(PRIM_TYPE) \
>> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __private) \
>> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __local) \
>> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __constant) \
>> > > -  _CLC_VECTOR_VLOAD_DECL(PRIM_TYPE, __global) \
>> > > +  _CLC_VECTOR_VLOAD_PRIM3(, PRIM_TYPE, PRIM_TYPE) \
>> > >
>> > >  #define _CLC_VECTOR_VLOAD_PRIM() \
>> > >      _CLC_VECTOR_VLOAD_PRIM1(char) \
>> > > @@ -24,14 +27,16 @@
>> > >      _CLC_VECTOR_VLOAD_PRIM1(long) \
>> > >      _CLC_VECTOR_VLOAD_PRIM1(ulong) \
>> > >      _CLC_VECTOR_VLOAD_PRIM1(float) \
>> > > -
>> > > +    _CLC_VECTOR_VLOAD_PRIM3(_half, half, float)
>> > > +
>> > >  #ifdef cl_khr_fp64
>> > > -#define _CLC_VECTOR_VLOAD() \
>> > > -  _CLC_VECTOR_VLOAD_PRIM1(double) \
>> > > -  _CLC_VECTOR_VLOAD_PRIM()
>> > > -#else
>> > > -#define _CLC_VECTOR_VLOAD() \
>> > > -  _CLC_VECTOR_VLOAD_PRIM()
>> > > +#pragma OPENCL EXTENSION cl_khr_fp64: enable
>> > > +  _CLC_VECTOR_VLOAD_PRIM1(double)
>> > >  #endif
>> > >
>> > > -_CLC_VECTOR_VLOAD()
>> > > +_CLC_VECTOR_VLOAD_PRIM()
>> > > +// Plain vload_half also needs to be declared
>> > > +_CLC_VLOAD_DECL(_half, half, float, , __constant)
>> > > +_CLC_VLOAD_DECL(_half, half, float, , __global)
>> > > +_CLC_VLOAD_DECL(_half, half, float, , __local)
>> > > +_CLC_VLOAD_DECL(_half, half, float, , __private)
>> > > diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
>> > > index 9e0157b..8b080f5 100644
>> > > --- a/generic/lib/SOURCES
>> > > +++ b/generic/lib/SOURCES
>> > > @@ -144,6 +144,7 @@ shared/clamp.cl
>> > >  shared/max.cl
>> > >  shared/min.cl
>> > >  shared/vload.cl
>> > > +shared/vload_half_helpers.ll
>> > >  shared/vstore.cl
>> > >  shared/vstore_half_helpers.ll
>> > >  workitem/get_global_id.cl
>> > > diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
>> > > index 8897200..46d6486 100644
>> > > --- a/generic/lib/shared/vload.cl
>> > > +++ b/generic/lib/shared/vload.cl
>> > > @@ -50,3 +50,52 @@ VLOAD_TYPES()
>> > >  #pragma OPENCL EXTENSION cl_khr_fp64 : enable
>> > >      VLOAD_ADDR_SPACES(double)
>> > >  #endif
>> > > +
>> > > +/* vload_half are legal even without cl_khr_fp16 */
>> > > +
>> > > +float __clc_vload_half_float_helper__constant(const __constant half *);
>> > > +float __clc_vload_half_float_helper__global(const __global half *);
>> > > +float __clc_vload_half_float_helper__local(const __local half *);
>> > > +float __clc_vload_half_float_helper__private(const __private half *);
>> > > +
>> > > +/* no vload_half for double */
>> > > +
>> > > +#define VEC_LOAD1(val, AS) val = __clc_vload_half_float_helper##AS (&mem[offset++]);
>> > > +#define VEC_LOAD2(val, AS) \
>> > > +       VEC_LOAD1(val.lo, AS) \
>> > > +       VEC_LOAD1(val.hi, AS)
>> > > +#define VEC_LOAD3(val, AS) \
>> > > +       VEC_LOAD1(val.s0, AS) \
>> > > +       VEC_LOAD1(val.s1, AS) \
>> > > +       VEC_LOAD1(val.s2, AS)
>> > > +#define VEC_LOAD4(val, AS) \
>> > > +       VEC_LOAD2(val.lo, AS) \
>> > > +       VEC_LOAD2(val.hi, AS)
>> > > +#define VEC_LOAD8(val, AS) \
>> > > +       VEC_LOAD4(val.lo, AS) \
>> > > +       VEC_LOAD4(val.hi, AS)
>> > > +#define VEC_LOAD16(val, AS) \
>> > > +       VEC_LOAD8(val.lo, AS) \
>> > > +       VEC_LOAD8(val.hi, AS)
>> > > +
>> > > +#define __FUNC(SUFFIX, VEC_SIZE, TYPE, AS) \
>> > > +  _CLC_OVERLOAD _CLC_DEF TYPE vload_half##SUFFIX(size_t offset, const AS half *mem) { \
>> > > +    offset *= VEC_SIZE; \
>> > > +    TYPE __tmp; \
>> > > +    VEC_LOAD##VEC_SIZE(__tmp, AS) \
>> > > +    return __tmp; \
>> > > +  }
>> > > +
>> > > +#define FUNC(SUFFIX, VEC_SIZE, TYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, AS)
>> > > +
>> > > +#define __CLC_BODY "vload_half.inc"
>> > > +#include <clc/math/gentype.inc>
>> > > +#undef __CLC_BODY
>> > > +#undef FUNC
>> > > +#undef __FUNC
>> > > +#undef VEC_LOAD16
>> > > +#undef VEC_LOAD8
>> > > +#undef VEC_LOAD4
>> > > +#undef VEC_LOAD3
>> > > +#undef VEC_LOAD2
>> > > +#undef VEC_LOAD1
>> > > diff --git a/generic/lib/shared/vload_half.inc b/generic/lib/shared/vload_half.inc
>> > > new file mode 100644
>> > > index 0000000..00dae8a
>> > > --- /dev/null
>> > > +++ b/generic/lib/shared/vload_half.inc
>> > > @@ -0,0 +1,13 @@
>> > > +#if __CLC_FPSIZE == 32
>> > > +#ifdef __CLC_VECSIZE
>> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __private);
>> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __local);
>> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __global);
>> > > +  FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __constant);
>> > > +#else
>> > > +  FUNC(, 1, __CLC_GENTYPE, __private);
>> > > +  FUNC(, 1, __CLC_GENTYPE, __local);
>> > > +  FUNC(, 1, __CLC_GENTYPE, __global);
>> > > +  FUNC(, 1, __CLC_GENTYPE, __constant);
>> > > +#endif
>> > > +#endif
>> > > diff --git a/generic/lib/shared/vload_half_helpers.ll b/generic/lib/shared/vload_half_helpers.ll
>> > > new file mode 100644
>> > > index 0000000..b8c905a
>> > > --- /dev/null
>> > > +++ b/generic/lib/shared/vload_half_helpers.ll
>> > > @@ -0,0 +1,23 @@
>> > > +define float @__clc_vload_half_float_helper__private(half addrspace(0)* nocapture %ptr) nounwind alwaysinline {
>> > > +  %data = load half, half addrspace(0)* %ptr
>> >
>> > Not sure I'm a fan of hard-coding the address space mappings here. In
>> > the past, we've defined functions with a name of _addr[1234], and then
>> > had target-specific mappings between the named address spaces and the
>> > numeric ones.
>
> I'm not a fan either. for now this patch follows vstore_half for
> consistency. I thinks there is a bigger problem with using LLVM AS in
> libclc. I've been considering 2 ways to address it:
>
> a) rewrite everything in CLC. This will require adding clang builtins
> for missing functionality.
>
> b) pass .ll files through clang preprocessor. This would allow .ll
> includes (for target data layout), and target specific defines in
> shared .ll code (like clang/llvm version dependent AS mapping).
>
> I have a slight preference for a), but it might restrict what
> clang/llvm version we support (or add workarounds like the current
> waitcnt implementation). I'd also have to see how receptive the clang
> ppl are to having builtins for everything that we need and can't be
> done with pure CLC.
>
> Do you think this should be addressed before any changes that use .ll
> files land?

For now, I'd settle for renaming __clc_vload_half_float_helper__* to
just indicate the numeric address space being loaded from (e.g.
__clc_vload_half_float_helper_addr1), and then map those to named
address spaces via the .cl/.inc files in generic/lib/shared/.

That way, if we have to change them later, there's less confusion
about calling a function named *__global for what is now the
local/whatever address space.

--Aaron

>
> Jan
>
>> >
>> > --Aaron
>> >
>> > > +  %res = fpext half %data to float
>> > > +  ret float %res
>> > > +}
>> > > +
>> > > +define float @__clc_vload_half_float_helper__global(half addrspace(1)* nocapture %ptr) nounwind alwaysinline {
>> > > +  %data = load half, half addrspace(1)* %ptr
>> > > +  %res = fpext half %data to float
>> > > +  ret float %res
>> > > +}
>> > > +
>> > > +define float @__clc_vload_half_float_helper__local(half addrspace(3)* nocapture %ptr) nounwind alwaysinline {
>> > > +  %data = load half, half addrspace(3)* %ptr
>> > > +  %res = fpext half %data to float
>> > > +  ret float %res
>> > > +}
>> > > +
>> > > +define float @__clc_vload_half_float_helper__constant(half addrspace(2)* nocapture %ptr) nounwind alwaysinline {
>> > > +  %data = load half, half addrspace(2)* %ptr
>> > > +  %res = fpext half %data to float
>> > > +  ret float %res
>> > > +}
>> > > --
>> > > 2.13.5
>> > >
>> > > _______________________________________________
>> > > Libclc-dev mailing list
>> > > Libclc-dev at lists.llvm.org
>> > > http://lists.llvm.org/cgi-bin/mailman/listinfo/libclc-dev


More information about the Libclc-dev mailing list