[Libclc-dev] [PATCH 1/3] vload/vstore: Use casts instead of scalarizing everything in CLC version

Erik Schnetter schnetter at gmail.com
Sat Jul 26 07:02:50 PDT 2014


On Jul 26, 2014, at 0:22 , Tom Stellard <tom at stellard.net> wrote:

> On Fri, Jul 25, 2014 at 06:15:52PM -0500, Aaron Watry wrote:
>> This generates bitcode which is indistinguishable from what was
>> hand-written for int32 types in v[load|store]_impl.ll
>> 
> 
> The LLVM IR produce by these implementations is incorrect.  The
> alignment on the loads and stores needs to be the size of the base type.
> So, the load produced by an int2 vload should have an alignment of 4
> bytes.
> 
> You may already have something like this, but here is the command I used to
> compile vload.cl to LLVM IR to verify the alignment.
> 
> clang -S -emit-llvm -o $1.ll -include /usr/local/include/clc/clc.h
> -I/usr/local//include/ -Dcl_clang_storage_class_specifiers -target r600
> -mcpu=verde -c $1
> 
> To get the correct alignment you'll want to something like:
> 
> int2 vload2(size_t offset, local int *ptr) {
>        ptr += offset * 2;
>        return (int2)(ptr[0], ptr[1]);
> }

We are using this in pocl:

TYPE##2 _CL_OVERLOADABLE
vload2(size_t offset, const MOD TYPE *p)
{
  return (TYPE##2)(p[offset*2], p[offset*2+1]);
}

which is essentially the same as you suggest.

-erik

>> Signed-off-by: Aaron Watry <awatry at gmail.com>
>> ---
>> generic/lib/shared/vload.cl  | 10 +++++-----
>> generic/lib/shared/vstore.cl | 16 +++++-----------
>> 2 files changed, 10 insertions(+), 16 deletions(-)
>> 
>> diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
>> index 6793072..c6ea683 100644
>> --- a/generic/lib/shared/vload.cl
>> +++ b/generic/lib/shared/vload.cl
>> @@ -2,23 +2,23 @@
>> 
>> #define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
>>   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>> -    return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
>> +    return *((const ADDR_SPACE PRIM_TYPE##2*)(&x[2*offset])); \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>> -    return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
>> +    return *((const ADDR_SPACE PRIM_TYPE##3*)(&x[3*offset])); \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>> -    return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
>> +    return *((const ADDR_SPACE PRIM_TYPE##4*)(&x[4*offset])); \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>> -    return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
>> +    return *((const ADDR_SPACE PRIM_TYPE##8*)(&x[8*offset])); \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>> -    return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
>> +    return *((const ADDR_SPACE PRIM_TYPE##16*)(&x[16*offset])); \
>>   } \
>> 
>> #define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
>> diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
>> index f6d360e..9cb35ad 100644
>> --- a/generic/lib/shared/vstore.cl
>> +++ b/generic/lib/shared/vstore.cl
>> @@ -4,29 +4,23 @@
>> 
>> #define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
>>   _CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>> -    mem[2*offset] = vec.s0; \
>> -    mem[2*offset+1] = vec.s1; \
>> +    *((ADDR_SPACE PRIM_TYPE##2*)(&mem[2*offset])) = vec; \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>> -    mem[3*offset] = vec.s0; \
>> -    mem[3*offset+1] = vec.s1; \
>> -    mem[3*offset+2] = vec.s2; \
>> +    *((ADDR_SPACE PRIM_TYPE##3*)(&mem[3*offset])) = vec; \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>> -    vstore2(vec.lo, 0, &mem[offset*4]); \
>> -    vstore2(vec.hi, 1, &mem[offset*4]); \
>> +    *((ADDR_SPACE PRIM_TYPE##4*)(&mem[4*offset])) = vec; \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>> -    vstore4(vec.lo, 0, &mem[offset*8]); \
>> -    vstore4(vec.hi, 1, &mem[offset*8]); \
>> +    *((ADDR_SPACE PRIM_TYPE##8*)(&mem[8*offset])) = vec; \
>>   } \
>> \
>>   _CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>> -    vstore8(vec.lo, 0, &mem[offset*16]); \
>> -    vstore8(vec.hi, 1, &mem[offset*16]); \
>> +    *((ADDR_SPACE PRIM_TYPE##16*)(&mem[16*offset])) = vec; \
>>   } \
>> 
>> #define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
>> -- 
>> 1.9.1
>> 
>> 
>> _______________________________________________
>> Libclc-dev mailing list
>> Libclc-dev at pcc.me.uk
>> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev

-- 
Erik Schnetter <schnetter at gmail.com>
http://www.perimeterinstitute.ca/personal/eschnetter/

My email is as private as my paper mail. I therefore support encrypting
and signing email messages. Get my PGP key from http://pgp.mit.edu/.

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 203 bytes
Desc: Message signed with OpenPGP using GPGMail
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20140726/71a1edb3/attachment.sig>


More information about the Libclc-dev mailing list