[Libclc-dev] [PATCH 2/3] Implement async_work_group_copy builtin
Tom Stellard
thomas.stellard at amd.com
Fri Aug 8 14:40:54 PDT 2014
This is a simple implementation which just copies data synchronously.
---
generic/include/clc/async/async_work_group_copy.h | 15 +++++++++++++++
generic/include/clc/async/async_work_group_copy.inc | 5 +++++
generic/include/clc/clc.h | 1 +
generic/lib/SOURCES | 1 +
generic/lib/async/async_work_group_copy.cl | 21 +++++++++++++++++++++
generic/lib/async/async_work_group_copy.inc | 16 ++++++++++++++++
6 files changed, 59 insertions(+)
create mode 100644 generic/include/clc/async/async_work_group_copy.h
create mode 100644 generic/include/clc/async/async_work_group_copy.inc
create mode 100644 generic/lib/async/async_work_group_copy.cl
create mode 100644 generic/lib/async/async_work_group_copy.inc
diff --git a/generic/include/clc/async/async_work_group_copy.h b/generic/include/clc/async/async_work_group_copy.h
new file mode 100644
index 0000000..39c637b
--- /dev/null
+++ b/generic/include/clc/async/async_work_group_copy.h
@@ -0,0 +1,15 @@
+#define __CLC_DST_ADDR_SPACE local
+#define __CLC_SRC_ADDR_SPACE global
+#define __CLC_BODY <clc/async/async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
+
+#define __CLC_DST_ADDR_SPACE global
+#define __CLC_SRC_ADDR_SPACE local
+#define __CLC_BODY <clc/async/async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
diff --git a/generic/include/clc/async/async_work_group_copy.inc b/generic/include/clc/async/async_work_group_copy.inc
new file mode 100644
index 0000000..d85df6c
--- /dev/null
+++ b/generic/include/clc/async/async_work_group_copy.inc
@@ -0,0 +1,5 @@
+_CLC_OVERLOAD _CLC_DECL event_t async_work_group_copy(
+ __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
+ const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ event_t event);
diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index f499e6d..ed741b1 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -125,6 +125,7 @@
#include <clc/synchronization/barrier.h>
/* 6.11.10 Async Copy and Prefetch Functions */
+#include <clc/async/async_work_group_copy.h>
#include <clc/async/prefetch.h>
#include <clc/async/wait_group_events.h>
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index 3e847fe..e7dbca5 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -1,3 +1,4 @@
+async/async_work_group_copy.cl
async/prefetch.cl
async/wait_group_events.cl
atomic/atomic_impl.ll
diff --git a/generic/lib/async/async_work_group_copy.cl b/generic/lib/async/async_work_group_copy.cl
new file mode 100644
index 0000000..31c71d6
--- /dev/null
+++ b/generic/lib/async/async_work_group_copy.cl
@@ -0,0 +1,21 @@
+#include <clc/clc.h>
+
+#ifdef cl_khr_fp64
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+#endif
+
+#define __CLC_DST_ADDR_SPACE local
+#define __CLC_SRC_ADDR_SPACE global
+#define __CLC_BODY <async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
+
+#define __CLC_DST_ADDR_SPACE global
+#define __CLC_SRC_ADDR_SPACE local
+#define __CLC_BODY <async_work_group_copy.inc>
+#include <clc/async/gentype.inc>
+#undef __CLC_DST_ADDR_SPACE
+#undef __CLC_SRC_ADDR_SPACE
+#undef __CLC_BODY
diff --git a/generic/lib/async/async_work_group_copy.inc b/generic/lib/async/async_work_group_copy.inc
new file mode 100644
index 0000000..dd3db3f
--- /dev/null
+++ b/generic/lib/async/async_work_group_copy.inc
@@ -0,0 +1,16 @@
+_CLC_OVERLOAD _CLC_DEF event_t async_work_group_copy(
+ __CLC_DST_ADDR_SPACE __CLC_GENTYPE *dst,
+ const __CLC_SRC_ADDR_SPACE __CLC_GENTYPE *src,
+ size_t num_gentypes,
+ event_t event) {
+
+ // __builtin_memcpy doesn't work with address spaces, so we need to
+ // implement the copy using a loop.
+
+ unsigned i;
+ for (i = 0; i < num_gentypes; ++i) {
+ dst[i] = src[i];
+ }
+
+ return event;
+}
--
1.8.1.5
More information about the Libclc-dev
mailing list