[Libclc-dev] [PATCH 1/3] vload/vstore: Use casts instead of scalarizing everything in CLC version

Matt Arsenault arsenm2 at gmail.com
Fri Aug 8 22:18:59 PDT 2014


On Jul 26, 2014, at 7:02 AM, Erik Schnetter <schnetter at gmail.com> wrote:

> On Jul 26, 2014, at 0:22 , Tom Stellard <tom at stellard.net> wrote:
> 
>> On Fri, Jul 25, 2014 at 06:15:52PM -0500, Aaron Watry wrote:
>>> This generates bitcode which is indistinguishable from what was
>>> hand-written for int32 types in v[load|store]_impl.ll
>>> 
>> 
>> The LLVM IR produce by these implementations is incorrect.  The
>> alignment on the loads and stores needs to be the size of the base type.
>> So, the load produced by an int2 vload should have an alignment of 4
>> bytes.
>> 
>> You may already have something like this, but here is the command I used to
>> compile vload.cl to LLVM IR to verify the alignment.
>> 
>> clang -S -emit-llvm -o $1.ll -include /usr/local/include/clc/clc.h
>> -I/usr/local//include/ -Dcl_clang_storage_class_specifiers -target r600
>> -mcpu=verde -c $1
>> 
>> To get the correct alignment you'll want to something like:
>> 
>> int2 vload2(size_t offset, local int *ptr) {
>>       ptr += offset * 2;
>>       return (int2)(ptr[0], ptr[1]);
>> }
> 
> We are using this in pocl:
> 
> TYPE##2 _CL_OVERLOADABLE
> vload2(size_t offset, const MOD TYPE *p)
> {
>  return (TYPE##2)(p[offset*2], p[offset*2+1]);
> }
> 
> which is essentially the same as you suggest.
> 
> -erik
> 

I think this will result in IR with 2 loads in it. The IR from these functions should be a single instruction with the alignment specified to be the element type’s alignment, and I don’t think anything will optimize that into it. The current implementation results in extractelement + store for each. 


Something like this:
typedef float2 less_aligned_float2 __attribute__ ((aligned (4)));
void test_vstore2(local float* ptr, float2 val)
{
    *((local less_aligned_float2*) ptr) = val;
}

Gives the expected IR of a single vector store with the right alignment:

define void @test_vstore2(float addrspace(3)* nocapture %ptr, <2 x float> %val) #0 {
entry:
  %0 = bitcast float addrspace(3)* %ptr to <2 x float> addrspace(3)*
  store <2 x float> %val, <2 x float> addrspace(3)* %0, align 4, !tbaa !1
  ret void
}




>>> Signed-off-by: Aaron Watry <awatry at gmail.com>
>>> ---
>>> generic/lib/shared/vload.cl  | 10 +++++-----
>>> generic/lib/shared/vstore.cl | 16 +++++-----------
>>> 2 files changed, 10 insertions(+), 16 deletions(-)
>>> 
>>> diff --git a/generic/lib/shared/vload.cl b/generic/lib/shared/vload.cl
>>> index 6793072..c6ea683 100644
>>> --- a/generic/lib/shared/vload.cl
>>> +++ b/generic/lib/shared/vload.cl
>>> @@ -2,23 +2,23 @@
>>> 
>>> #define VLOAD_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
>>>  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##2 vload2(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>>> -    return (PRIM_TYPE##2)(x[2*offset] , x[2*offset+1]); \
>>> +    return *((const ADDR_SPACE PRIM_TYPE##2*)(&x[2*offset])); \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##3 vload3(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>>> -    return (PRIM_TYPE##3)(x[3*offset] , x[3*offset+1], x[3*offset+2]); \
>>> +    return *((const ADDR_SPACE PRIM_TYPE##3*)(&x[3*offset])); \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##4 vload4(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>>> -    return (PRIM_TYPE##4)(x[4*offset], x[4*offset+1], x[4*offset+2], x[4*offset+3]); \
>>> +    return *((const ADDR_SPACE PRIM_TYPE##4*)(&x[4*offset])); \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##8 vload8(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>>> -    return (PRIM_TYPE##8)(vload4(0, &x[8*offset]), vload4(1, &x[8*offset])); \
>>> +    return *((const ADDR_SPACE PRIM_TYPE##8*)(&x[8*offset])); \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF PRIM_TYPE##16 vload16(size_t offset, const ADDR_SPACE PRIM_TYPE *x) { \
>>> -    return (PRIM_TYPE##16)(vload8(0, &x[16*offset]), vload8(1, &x[16*offset])); \
>>> +    return *((const ADDR_SPACE PRIM_TYPE##16*)(&x[16*offset])); \
>>>  } \
>>> 
>>> #define VLOAD_ADDR_SPACES(__CLC_SCALAR_GENTYPE) \
>>> diff --git a/generic/lib/shared/vstore.cl b/generic/lib/shared/vstore.cl
>>> index f6d360e..9cb35ad 100644
>>> --- a/generic/lib/shared/vstore.cl
>>> +++ b/generic/lib/shared/vstore.cl
>>> @@ -4,29 +4,23 @@
>>> 
>>> #define VSTORE_VECTORIZE(PRIM_TYPE, ADDR_SPACE) \
>>>  _CLC_OVERLOAD _CLC_DEF void vstore2(PRIM_TYPE##2 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>>> -    mem[2*offset] = vec.s0; \
>>> -    mem[2*offset+1] = vec.s1; \
>>> +    *((ADDR_SPACE PRIM_TYPE##2*)(&mem[2*offset])) = vec; \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF void vstore3(PRIM_TYPE##3 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>>> -    mem[3*offset] = vec.s0; \
>>> -    mem[3*offset+1] = vec.s1; \
>>> -    mem[3*offset+2] = vec.s2; \
>>> +    *((ADDR_SPACE PRIM_TYPE##3*)(&mem[3*offset])) = vec; \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF void vstore4(PRIM_TYPE##4 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>>> -    vstore2(vec.lo, 0, &mem[offset*4]); \
>>> -    vstore2(vec.hi, 1, &mem[offset*4]); \
>>> +    *((ADDR_SPACE PRIM_TYPE##4*)(&mem[4*offset])) = vec; \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF void vstore8(PRIM_TYPE##8 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>>> -    vstore4(vec.lo, 0, &mem[offset*8]); \
>>> -    vstore4(vec.hi, 1, &mem[offset*8]); \
>>> +    *((ADDR_SPACE PRIM_TYPE##8*)(&mem[8*offset])) = vec; \
>>>  } \
>>> \
>>>  _CLC_OVERLOAD _CLC_DEF void vstore16(PRIM_TYPE##16 vec, size_t offset, ADDR_SPACE PRIM_TYPE *mem) { \
>>> -    vstore8(vec.lo, 0, &mem[offset*16]); \
>>> -    vstore8(vec.hi, 1, &mem[offset*16]); \
>>> +    *((ADDR_SPACE PRIM_TYPE##16*)(&mem[16*offset])) = vec; \
>>>  } \
>>> 
>>> #define VSTORE_ADDR_SPACES(__CLC_SCALAR___CLC_GENTYPE) \
>>> -- 
>>> 1.9.1
>>> 
>>> 
>>> _______________________________________________
>>> Libclc-dev mailing list
>>> Libclc-dev at pcc.me.uk
>>> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
>> 
>> _______________________________________________
>> Libclc-dev mailing list
>> Libclc-dev at pcc.me.uk
>> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev
> 
> -- 
> Erik Schnetter <schnetter at gmail.com>
> http://www.perimeterinstitute.ca/personal/eschnetter/
> 
> My email is as private as my paper mail. I therefore support encrypting
> and signing email messages. Get my PGP key from http://pgp.mit.edu/.
> 
> _______________________________________________
> Libclc-dev mailing list
> Libclc-dev at pcc.me.uk
> http://www.pcc.me.uk/cgi-bin/mailman/listinfo/libclc-dev

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/libclc-dev/attachments/20140808/3ed8c9ef/attachment.html>


More information about the Libclc-dev mailing list