[Libclc-dev] [PATCH 2/3] Implement async_work_group_strided_copy builtin v2

Jeroen Ketema j.ketema at imperial.ac.uk
Wed Sep 24 02:54:25 PDT 2014


LGTM

Jeroen

On 24 Sep 2014, at 01:42, Tom Stellard <thomas.stellard at amd.com> wrote:

> This is a simple implementation which just copies data synchronously.
> 
> v2:
>  - Use size_t.
> ---
> .../clc/async/async_work_group_strided_copy.h      | 15 ++++++++++
> .../clc/async/async_work_group_strided_copy.inc    |  6 ++++
> generic/include/clc/clc.h                          |  1 +
> generic/lib/SOURCES                                |  1 +
> generic/lib/async/async_work_group_strided_copy.cl |  9 ++++++
> .../lib/async/async_work_group_strided_copy.inc    | 34 ++++++++++++++++++++++
> 6 files changed, 66 insertions(+)
> create mode 100644 generic/include/clc/async/async_work_group_strided_copy.h
> create mode 100644 generic/include/clc/async/async_work_group_strided_copy.inc
> create mode 100644 generic/lib/async/async_work_group_strided_copy.cl
> create mode 100644 generic/lib/async/async_work_group_strided_copy.inc
> 
> diff --git a/generic/include/clc/async/async_work_group_strided_copy.h b/generic/include/clc/async/async_work_group_strided_copy.h
> new file mode 100644
> index 0000000..bfa6f31
> --- /dev/null
> +++ b/generic/include/clc/async/async_work_group_strided_copy.h
> @@ -0,0 +1,15 @@
> +#define __CLC_DST_ADDR_SPACE local
> +#define __CLC_SRC_ADDR_SPACE global
> +#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
> +#include <clc/async/gentype.inc>
> +#undef __CLC_DST_ADDR_SPACE
> +#undef __CLC_SRC_ADDR_SPACE
> +#undef __CLC_BODY
> +
> +#define __CLC_DST_ADDR_SPACE global
> +#define __CLC_SRC_ADDR_SPACE local
> +#define __CLC_BODY <clc/async/async_work_group_strided_copy.inc>
> +#include <clc/async/gentype.inc>
> +#undef __CLC_DST_ADDR_SPACE
> +#undef __CLC_SRC_ADDR_SPACE
> +#undef __CLC_BODY
> diff --git a/generic/include/clc/async/async_work_group_strided_copy.inc b/generic/include/clc/async/async_work_group_strided_copy.inc
> new file mode 100644
> index 0000000..bdbea3a
> --- /dev/null
> +++ b/generic/include/clc/async/async_work_group_strided_copy.inc
> @@ -0,0 +1,6 @@
> +_CLC_OVERLOAD _CLC_DECL event_t async_work_group_strided_copy(
> +  __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
> +  const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
> +  size_t num_gentypes,
> +  size_t stride,
> +  event_t event);
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 0dccf53..0160e18 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -137,6 +137,7 @@
> #include <clc/synchronization/barrier.h>
> 
> /* 6.11.10 Async Copy and Prefetch Functions */
> +#include <clc/async/async_work_group_strided_copy.h>
> #include <clc/async/prefetch.h>
> #include <clc/async/wait_group_events.h>
> 
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index cefef94..beca288 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -1,3 +1,4 @@
> +async/async_work_group_strided_copy.cl
> async/prefetch.cl
> async/wait_group_events.cl
> atomic/atomic_impl.ll
> diff --git a/generic/lib/async/async_work_group_strided_copy.cl b/generic/lib/async/async_work_group_strided_copy.cl
> new file mode 100644
> index 0000000..61b8898
> --- /dev/null
> +++ b/generic/lib/async/async_work_group_strided_copy.cl
> @@ -0,0 +1,9 @@
> +#include <clc/clc.h>
> +
> +#ifdef cl_khr_fp64
> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
> +#endif
> +
> +#define __CLC_BODY <async_work_group_strided_copy.inc>
> +#include <clc/async/gentype.inc>
> +#undef __CLC_BODY
> diff --git a/generic/lib/async/async_work_group_strided_copy.inc b/generic/lib/async/async_work_group_strided_copy.inc
> new file mode 100644
> index 0000000..d81a8b7
> --- /dev/null
> +++ b/generic/lib/async/async_work_group_strided_copy.inc
> @@ -0,0 +1,34 @@
> +
> +#define STRIDED_COPY(dst, src, num_gentypes, dst_stride, src_stride)       \
> +  size_t size = get_local_size(0) * get_local_size(1) * get_local_size(2); \
> +  size_t id = (get_local_size(1) * get_local_size(2) * get_local_id(0)) +  \
> +              (get_local_size(2) * get_local_id(1)) +                      \
> +              get_local_id(2);                                             \
> +  size_t i;                                                                \
> +                                                                           \
> +  for (i = id; i < num_gentypes; i += size) {                              \
> +    dst[i * dst_stride] = src[i * src_stride];                             \
> +  }
> +
> +
> +_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
> +    local __CLC_GENTYPE *dst,
> +    const global __CLC_GENTYPE *src,
> +    size_t num_gentypes,
> +    size_t src_stride,
> +    event_t event) {
> +
> +  STRIDED_COPY(dst, src, num_gentypes, 1, src_stride);
> +  return event;
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF event_t async_work_group_strided_copy(
> +    global __CLC_GENTYPE *dst,
> +    const local __CLC_GENTYPE *src,
> +    size_t num_gentypes,
> +    size_t dst_stride,
> +    event_t event) {
> +
> +  STRIDED_COPY(dst, src, num_gentypes, dst_stride, 1);
> +  return event;
> +}
> -- 
> 1.8.5.5
> 
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev





More information about the Libclc-dev mailing list