[Libclc-dev] [PATCH 1/3] vload/vstore: Use casts instead of scalarizing everything in CLC version
Aaron Watry
awatry at gmail.com
Fri Jul 25 16:15:52 PDT 2014
This generates bitcode which is indistinguishable from what was
hand-written for int32 types in v[load|store]_impl.ll
Signed-off-by: Aaron Watry <awatry at gmail.com>
---
generic/lib/shared/vload.cl | 10 +++++-----
generic/lib/shared/vstore.cl | 16 +++++-----------
2 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
index 6793072..c6ea683 100644
--- a/generic/lib/shared/vload.cl
+++ b/generic/lib/shared/vload.cl
@@ -2,23 +2,23 @@
#define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
- return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
+ return *((const ADDR_SPACE PRIM_TYPE##2*)(&x[2*offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
- return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
+ return *((const ADDR_SPACE PRIM_TYPE##3*)(&x[3*offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
- return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
+ return *((const ADDR_SPACE PRIM_TYPE##4*)(&x[4*offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
- return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
+ return *((const ADDR_SPACE PRIM_TYPE##8*)(&x[8*offset])); \
} \
\
_CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
- return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
+ return *((const ADDR_SPACE PRIM_TYPE##16*)(&x[16*offset])); \
} \
#define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
index f6d360e..9cb35ad 100644
--- a/generic/lib/shared/vstore.cl
+++ b/generic/lib/shared/vstore.cl
@@ -4,29 +4,23 @@
#define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
_CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
- mem[2*offset] = vec.s0; \
- mem[2*offset+1] = vec.s1; \
+ *((ADDR_SPACE PRIM_TYPE##2*)(&mem[2*offset])) = vec; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
- mem[3*offset] = vec.s0; \
- mem[3*offset+1] = vec.s1; \
- mem[3*offset+2] = vec.s2; \
+ *((ADDR_SPACE PRIM_TYPE##3*)(&mem[3*offset])) = vec; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
- vstore2(vec.lo, 0, &mem[offset*4]); \
- vstore2(vec.hi, 1, &mem[offset*4]); \
+ *((ADDR_SPACE PRIM_TYPE##4*)(&mem[4*offset])) = vec; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
- vstore4(vec.lo, 0, &mem[offset*8]); \
- vstore4(vec.hi, 1, &mem[offset*8]); \
+ *((ADDR_SPACE PRIM_TYPE##8*)(&mem[8*offset])) = vec; \
} \
\
_CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
- vstore8(vec.lo, 0, &mem[offset*16]); \
- vstore8(vec.hi, 1, &mem[offset*16]); \
+ *((ADDR_SPACE PRIM_TYPE##16*)(&mem[16*offset])) = vec; \
} \
#define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
--
1.9.1
More information about the Libclc-dev
mailing list