[LLVMdev] Alignment of vectors

Fri Jul 18 06:45:02 PDT 2008

Consider the following C code:

typedef __attribute__(( ext_vector_type(2) )) float float2;
typedef __attribute__(( ext_vector_type(2) )) __attribute__(( aligned(4) ))
float float2_align2;

void foo(void)
{
  const float * p;
  size_t offset;
  float2 tmp = *((float2_align2 *)(p+offset));
}

When compiled with clang emit-llvm I get:

define void @foo() {
entry:
    %p = alloca float*, align 4        ; <float**> [#uses=1]
    %offset = alloca i32, align 4        ; <i32*> [#uses=1]
    %tmp = alloca <2 x float>, align 8        ; <<2 x float>*> [#uses=1]
    %tmp1 = load float** %p        ; <float*> [#uses=1]
    %tmp2 = load i32* %offset        ; <i32> [#uses=1]
    %add.ptr = getelementptr float* %tmp1, i32 %tmp2        ; <float*>
[#uses=1]
    %conv = bitcast float* %add.ptr to <2 x float>*        ; <<2 x float>*>
[#uses=1]
    %tmp3 = load <2 x float>* %conv        ; <<2 x float>> [#uses=1]
    store <2 x float> %tmp3, <2 x float>* %tmp
    ret void
}

The problem is that the load into tmp3 seems to have lost any information
that %conv should not be aligned to 8 bytes but rather 4. Of course, GCC
only states that the alignment attribute will try and enforce a minimal
alignment and so the above code generated by clang is valid but what about
if the following code had been generated:

define void @foo() {
entry:
    %p = alloca float*, align 4        ; <float**> [#uses=1]
    %offset = alloca i32, align 4        ; <i32*> [#uses=1]
    %tmp = alloca <2 x float>, align 4        ; <<2 x float>*> [#uses=1]
    %tmp1 = load float** %p        ; <float*> [#uses=1]
    %tmp2 = load i32* %offset        ; <i32> [#uses=1]
    %add.ptr = getelementptr float* %tmp1, i32 %tmp2        ; <float*>
[#uses=1]
    %conv = bitcast float* %add.ptr to <2 x float>*        ; <<2 x float>*>
[#uses=1]
    %tmp3 = load <2 x float>* %conv        ; <<2 x float>> [#uses=1]
    store <2 x float> %tmp3, <2 x float>* %tmp
    ret void
}

I¹m assuming that, in general, this is not correct as the code:

    %tmp3 = load <2 x float>* %conv        ; <<2 x float>> [#uses=1]

lacks the now necessary information that conv is unaligned and instead must
be written as:

    %tmp3 = load <2 x float>*%conv, align 2         ; <<2 x float>>, align2
[#uses=1]

Of course, it is now up to the backend to assure that an unaligned load is
handled correctly in hardware, but assuming this is ok, then is this
correct?

Thanks,

Ben
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20080718/250621b1/attachment.html>