[Libclc-dev] [PATCH 1/3] vload/vstore: Use casts instead of scalarizing everything in CLC version

Aaron Watry awatry at gmail.com
Fri Jul 25 16:15:52 PDT 2014


This generates bitcode which is indistinguishable from what was
hand-written for int32 types in v[load|store]_impl.ll

Signed-off-by: Aaron Watry <awatry at gmail.com>
---
 generic/lib/shared/vload.cl  | 10 +++++-----
 generic/lib/shared/vstore.cl | 16 +++++-----------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
index 6793072..c6ea683 100644
--- a/generic/lib/shared/vload.cl
+++ b/generic/lib/shared/vload.cl
@@ -2,23 +2,23 @@
 
 #define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
-    return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
+    return *((const ADDR_SPACE PRIM_TYPE##2*)(&x[2*offset])); \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
-    return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
+    return *((const ADDR_SPACE PRIM_TYPE##3*)(&x[3*offset])); \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
-    return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
+    return *((const ADDR_SPACE PRIM_TYPE##4*)(&x[4*offset])); \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
-    return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
+    return *((const ADDR_SPACE PRIM_TYPE##8*)(&x[8*offset])); \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
-    return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
+    return *((const ADDR_SPACE PRIM_TYPE##16*)(&x[16*offset])); \
   } \
 
 #define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
index f6d360e..9cb35ad 100644
--- a/generic/lib/shared/vstore.cl
+++ b/generic/lib/shared/vstore.cl
@@ -4,29 +4,23 @@
 
 #define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
   _CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
-    mem[2*offset] = vec.s0; \
-    mem[2*offset+1] = vec.s1; \
+    *((ADDR_SPACE PRIM_TYPE##2*)(&mem[2*offset])) = vec; \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
-    mem[3*offset] = vec.s0; \
-    mem[3*offset+1] = vec.s1; \
-    mem[3*offset+2] = vec.s2; \
+    *((ADDR_SPACE PRIM_TYPE##3*)(&mem[3*offset])) = vec; \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
-    vstore2(vec.lo, 0, &mem[offset*4]); \
-    vstore2(vec.hi, 1, &mem[offset*4]); \
+    *((ADDR_SPACE PRIM_TYPE##4*)(&mem[4*offset])) = vec; \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
-    vstore4(vec.lo, 0, &mem[offset*8]); \
-    vstore4(vec.hi, 1, &mem[offset*8]); \
+    *((ADDR_SPACE PRIM_TYPE##8*)(&mem[8*offset])) = vec; \
   } \
 \
   _CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
-    vstore8(vec.lo, 0, &mem[offset*16]); \
-    vstore8(vec.hi, 1, &mem[offset*16]); \
+    *((ADDR_SPACE PRIM_TYPE##16*)(&mem[16*offset])) = vec; \
   } \
 
 #define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
-- 
1.9.1





More information about the Libclc-dev mailing list