[Libclc-dev] [PATCH 1/3] vload/vstore: Use casts instead of scalarizing everything in CLC version

Tom Stellard tom at stellard.net
Fri Jul 25 22:22:26 PDT 2014


On Fri, Jul 25, 2014 at 06:15:52PM -0500, Aaron Watry wrote:
> This generates bitcode which is indistinguishable from what was
> hand-written for int32 types in v[load|store]_impl.ll
>

The LLVM IR produce by these implementations is incorrect.  The
alignment on the loads and stores needs to be the size of the base type.
So, the load produced by an int2 vload should have an alignment of 4
bytes.

You may already have something like this, but here is the command I used to
compile vload.cl to LLVM IR to verify the alignment.

clang -S -emit-llvm -o $1.ll -include /usr/local/include/clc/clc.h
-I/usr/local//include/ -Dcl_clang_storage_class_specifiers -target r600
-mcpu=verde -c $1

To get the correct alignment you'll want to something like:

int2 vload2(size_t offset, local int *ptr) {
        ptr += offset * 2;
        return (int2)(ptr[0], ptr[1]);
}

-Tom

> Signed-off-by: Aaron Watry <awatry at gmail.com>
> ---
>  generic/lib/shared/vload.cl  | 10 +++++-----
>  generic/lib/shared/vstore.cl | 16 +++++-----------
>  2 files changed, 10 insertions(+), 16 deletions(-)
> 
> diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
> index 6793072..c6ea683 100644
> --- a/generic/lib/shared/vload.cl
> +++ b/generic/lib/shared/vload.cl
> @@ -2,23 +2,23 @@
>  
>  #define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
>    _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
> -    return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
> +    return *((const ADDR_SPACE PRIM_TYPE##2*)(&x[2*offset])); \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
> -    return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
> +    return *((const ADDR_SPACE PRIM_TYPE##3*)(&x[3*offset])); \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
> -    return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
> +    return *((const ADDR_SPACE PRIM_TYPE##4*)(&x[4*offset])); \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
> -    return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
> +    return *((const ADDR_SPACE PRIM_TYPE##8*)(&x[8*offset])); \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
> -    return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
> +    return *((const ADDR_SPACE PRIM_TYPE##16*)(&x[16*offset])); \
>    } \
>  
>  #define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
> diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
> index f6d360e..9cb35ad 100644
> --- a/generic/lib/shared/vstore.cl
> +++ b/generic/lib/shared/vstore.cl
> @@ -4,29 +4,23 @@
>  
>  #define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
>    _CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
> -    mem[2*offset] = vec.s0; \
> -    mem[2*offset+1] = vec.s1; \
> +    *((ADDR_SPACE PRIM_TYPE##2*)(&mem[2*offset])) = vec; \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
> -    mem[3*offset] = vec.s0; \
> -    mem[3*offset+1] = vec.s1; \
> -    mem[3*offset+2] = vec.s2; \
> +    *((ADDR_SPACE PRIM_TYPE##3*)(&mem[3*offset])) = vec; \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
> -    vstore2(vec.lo, 0, &mem[offset*4]); \
> -    vstore2(vec.hi, 1, &mem[offset*4]); \
> +    *((ADDR_SPACE PRIM_TYPE##4*)(&mem[4*offset])) = vec; \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
> -    vstore4(vec.lo, 0, &mem[offset*8]); \
> -    vstore4(vec.hi, 1, &mem[offset*8]); \
> +    *((ADDR_SPACE PRIM_TYPE##8*)(&mem[8*offset])) = vec; \
>    } \
>  \
>    _CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
> -    vstore8(vec.lo, 0, &mem[offset*16]); \
> -    vstore8(vec.hi, 1, &mem[offset*16]); \
> +    *((ADDR_SPACE PRIM_TYPE##16*)(&mem[16*offset])) = vec; \
>    } \
>  
>  #define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
> -- 
> 1.9.1
> 
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev




More information about the Libclc-dev mailing list