[Libclc-dev] [PATCH 2/2] shared: Implement aligned vector storess (vstorea_half)
Jan Vesely via Libclc-dev
libclc-dev at lists.llvm.org
Sun Sep 24 14:03:46 PDT 2017
Float version pass newly posted piglit tests on Turks.
Signed-off-by: Jan Vesely <jan.vesely at rutgers.edu>
---
generic/include/clc/shared/vstore.h | 28 +++++++++++++++-------------
generic/lib/shared/vstore.cl | 30 ++++++++++++++++--------------
generic/lib/shared/vstore_half.inc | 21 +++++++++++++++------
3 files changed, 46 insertions(+), 33 deletions(-)
diff --git a/generic/include/clc/shared/vstore.h b/generic/include/clc/shared/vstore.h
index 0e3f694..e9c5702 100644
--- a/generic/include/clc/shared/vstore.h
+++ b/generic/include/clc/shared/vstore.h
@@ -16,21 +16,25 @@
#define _CLC_VECTOR_VSTORE_PRIM1(PRIM_TYPE) \
_CLC_VECTOR_VSTORE_PRIM3(,PRIM_TYPE, PRIM_TYPE) \
-#define _CLC_VECTOR_VSTORE_PRIM() \
- _CLC_VECTOR_VSTORE_PRIM1(char) \
- _CLC_VECTOR_VSTORE_PRIM1(uchar) \
- _CLC_VECTOR_VSTORE_PRIM1(short) \
- _CLC_VECTOR_VSTORE_PRIM1(ushort) \
- _CLC_VECTOR_VSTORE_PRIM1(int) \
- _CLC_VECTOR_VSTORE_PRIM1(uint) \
- _CLC_VECTOR_VSTORE_PRIM1(long) \
- _CLC_VECTOR_VSTORE_PRIM1(ulong) \
- _CLC_VECTOR_VSTORE_PRIM1(float) \
- _CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
+_CLC_VECTOR_VSTORE_PRIM1(char)
+_CLC_VECTOR_VSTORE_PRIM1(uchar)
+_CLC_VECTOR_VSTORE_PRIM1(short)
+_CLC_VECTOR_VSTORE_PRIM1(ushort)
+_CLC_VECTOR_VSTORE_PRIM1(int)
+_CLC_VECTOR_VSTORE_PRIM1(uint)
+_CLC_VECTOR_VSTORE_PRIM1(long)
+_CLC_VECTOR_VSTORE_PRIM1(ulong)
+_CLC_VECTOR_VSTORE_PRIM1(float)
+_CLC_VECTOR_VSTORE_PRIM3(_half, half, float)
+// Use suffix to declare aligned vstorea_halfN
+_CLC_VECTOR_VSTORE_PRIM3(a_half, half, float)
#ifdef cl_khr_fp64
_CLC_VECTOR_VSTORE_PRIM1(double)
_CLC_VECTOR_VSTORE_PRIM3(_half, half, double)
+ // Use suffix to declare aligned vstorea_halfN
+ _CLC_VECTOR_VSTORE_PRIM3(a_half, half, double)
+
_CLC_VSTORE_DECL(_half, half, double, , __private)
_CLC_VSTORE_DECL(_half, half, double, , __local)
_CLC_VSTORE_DECL(_half, half, double, , __global)
@@ -40,7 +44,6 @@
_CLC_VECTOR_VSTORE_PRIM1(half)
#endif
-_CLC_VECTOR_VSTORE_PRIM()
_CLC_VSTORE_DECL(_half, half, float, , __private)
_CLC_VSTORE_DECL(_half, half, float, , __local)
_CLC_VSTORE_DECL(_half, half, float, , __global)
@@ -49,4 +52,3 @@ _CLC_VSTORE_DECL(_half, half, float, , __global)
#undef _CLC_VECTOR_VSTORE_DECL
#undef _CLC_VECTOR_VSTORE_PRIM3
#undef _CLC_VECTOR_VSTORE_PRIM1
-#undef _CLC_VECTOR_VSTORE_PRIM
diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
index 3343c16..e5383a8 100644
--- a/generic/lib/shared/vstore.cl
+++ b/generic/lib/shared/vstore.cl
@@ -33,23 +33,22 @@
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __local) \
VSTORE_VECTORIZE(__CLC_SCALAR___CLC_GENTYPE, __global) \
-#define VSTORE_TYPES() \
- VSTORE_ADDR_SPACES(char) \
- VSTORE_ADDR_SPACES(uchar) \
- VSTORE_ADDR_SPACES(short) \
- VSTORE_ADDR_SPACES(ushort) \
- VSTORE_ADDR_SPACES(int) \
- VSTORE_ADDR_SPACES(uint) \
- VSTORE_ADDR_SPACES(long) \
- VSTORE_ADDR_SPACES(ulong) \
- VSTORE_ADDR_SPACES(float) \
+VSTORE_ADDR_SPACES(char)
+VSTORE_ADDR_SPACES(uchar)
+VSTORE_ADDR_SPACES(short)
+VSTORE_ADDR_SPACES(ushort)
+VSTORE_ADDR_SPACES(int)
+VSTORE_ADDR_SPACES(uint)
+VSTORE_ADDR_SPACES(long)
+VSTORE_ADDR_SPACES(ulong)
+VSTORE_ADDR_SPACES(float)
-VSTORE_TYPES()
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
VSTORE_ADDR_SPACES(double)
#endif
+
#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
VSTORE_ADDR_SPACES(half)
@@ -95,13 +94,17 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
VEC_STORE8(STYPE, AS, val.lo) \
VEC_STORE8(STYPE, AS, val.hi)
-#define __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) \
+#define __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) \
_CLC_OVERLOAD _CLC_DEF void vstore_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
offset *= VEC_SIZE; \
VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
+ } \
+ _CLC_OVERLOAD _CLC_DEF void vstorea_half##SUFFIX(TYPE vec, size_t offset, AS half *mem) { \
+ offset *= OFFSET; \
+ VEC_STORE##VEC_SIZE(STYPE, AS, vec) \
}
-#define FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, TYPE, STYPE, AS)
+#define FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS) __FUNC(SUFFIX, VEC_SIZE, OFFSET, TYPE, STYPE, AS)
#define __CLC_BODY "vstore_half.inc"
#include <clc/math/gentype.inc>
@@ -115,6 +118,5 @@ DECLARE_HELPER(double, __local, __builtin_store_half);
#undef VEC_LOAD2
#undef VEC_LOAD1
#undef DECLARE_HELPER
-#undef VSTORE_TYPES
#undef VSTORE_ADDR_SPACES
#undef VSTORE_VECTORIZE
diff --git a/generic/lib/shared/vstore_half.inc b/generic/lib/shared/vstore_half.inc
index fee52bc..ee4e38b 100644
--- a/generic/lib/shared/vstore_half.inc
+++ b/generic/lib/shared/vstore_half.inc
@@ -1,10 +1,19 @@
#ifdef __CLC_VECSIZE
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
- FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
+
+#if __CLC_VECSIZE == 3
+# define __CLC_OFFSET 4
+#else
+# define __CLC_OFFSET __CLC_VECSIZE
+#endif
+
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
+ FUNC(__CLC_VECSIZE, __CLC_VECSIZE, __CLC_OFFSET, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
+
+#undef __CLC_OFFSET
#else
- FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
- FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
- FUNC(, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
+ FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __private);
+ FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __local);
+ FUNC(, 1, 1, __CLC_GENTYPE, __CLC_SCALAR_GENTYPE, __global);
#endif
--
2.13.5
More information about the Libclc-dev
mailing list