[LLVMdev] Vector troubles

Chuck Rose III cfr at adobe.com
Fri Sep 28 14:31:01 PDT 2007


Hola LLVMers,

 

I'm working on engaging SSE via the LLVM vector ops on x86.  I had some
questions a while back that you all helped out on, but I'm seeing
similar issues and was hoping you'd have some ideas.  Below is the dump
of the LLVM IR of a program which is designed to take a vector stored in
a float*, build an LLVM vector from it, copy it to another vector, and
then take it apart and store it back out in another float*.  This will
live on the boundary of our system and would be a function designed to
promote a raw, potentially unaligned, value into a vector that the LLVM
system can work with a whole bunch.  

 

It is dying trying to store a our working vector into one of the LLVM
vectors created on the stack.  Despite the align-16 directive on the
alloca instruction, it is not always aligning to a 16-byte boundary.  

 

I did a sync and build this morning, so my LLVM is quite fresh.

 

Thank you for any help!

 

Chuck.

 

My program:

 

   target datalayout =
"E-p:32:32:32-i1:8:8:8-i8:8:8:8-i32:32:32:32-f32:32:32:32"

 

   define void @promoteCopyAndReturn(float* %promoteReturn, float*
%toPromote) {

   Entry:

        %Promoted_promoteReturn_Ptr = alloca <4 x float>, align 16
; <<4 x float>*> [#uses=2]

        %Promoted_toPromote_Ptr = alloca <4 x float>, align 16
; <<4 x float>*> [#uses=2]

        %elemPtr = getelementptr float* %toPromote, i32 0
; <float*> [#uses=1]

        %elemLoaded = load float* %elemPtr              ; <float>
[#uses=1]

        %vectorPromotion = insertelement <4 x float> undef, float
%elemLoaded, i32 0            ; <<4 x float>> [#uses=1]

        %elemPtr1 = getelementptr float* %toPromote, i32 1
; <float*> [#uses=1]

        %elemLoaded2 = load float* %elemPtr1            ; <float>
[#uses=1]

        %vectorPromotion3 = insertelement <4 x float> %vectorPromotion,
float %elemLoaded2, i32 1               ; <<4 x float>> [#uses=1]

        %elemPtr4 = getelementptr float* %toPromote, i32 2
; <float*> [#uses=1]

        %elemLoaded5 = load float* %elemPtr4            ; <float>
[#uses=1]

        %vectorPromotion6 = insertelement <4 x float> %vectorPromotion3,
float %elemLoaded5, i32 2              ; <<4 x float>> [#uses=1]

        %elemPtr7 = getelementptr float* %toPromote, i32 3
; <float*> [#uses=1]

        %elemLoaded8 = load float* %elemPtr7            ; <float>
[#uses=1]

        %vectorPromotion9 = insertelement <4 x float> %vectorPromotion6,
float %elemLoaded8, i32 3              ; <<4 x float>> [#uses=1]

        store <4 x float> %vectorPromotion9, <4 x float>*
%Promoted_toPromote_Ptr    <<<<<<<<--------  dying when it executes this
line (assembly below)

        %toPromote10 = load <4 x float>* %Promoted_toPromote_Ptr
; <<4 x float>> [#uses=1]

        br label %Body

 

   Body:             ; preds = %Entry

        store <4 x float> %toPromote10, <4 x float>*
%Promoted_promoteReturn_Ptr

        br label %Exit

 

   Exit:             ; preds = %Body

        %vectorToDemote = load <4 x float>* %Promoted_promoteReturn_Ptr
; <<4 x float>> [#uses=4]

        %elemToDemote = extractelement <4 x float> %vectorToDemote, i32
0               ; <float> [#uses=1]

        %elemPtr11 = getelementptr float* %promoteReturn, i32 0
; <float*> [#uses=1]

        store float %elemToDemote, float* %elemPtr11

        %elemToDemote12 = extractelement <4 x float> %vectorToDemote,
i32 1             ; <float> [#uses=1]

        %elemPtr13 = getelementptr float* %promoteReturn, i32 1
; <float*> [#uses=1]

        store float %elemToDemote12, float* %elemPtr13

        %elemToDemote14 = extractelement <4 x float> %vectorToDemote,
i32 2             ; <float> [#uses=1]

        %elemPtr15 = getelementptr float* %promoteReturn, i32 2
; <float*> [#uses=1]

        store float %elemToDemote14, float* %elemPtr15

        %elemToDemote16 = extractelement <4 x float> %vectorToDemote,
i32 3             ; <float> [#uses=1]

        %elemPtr17 = getelementptr float* %promoteReturn, i32 3
; <float*> [#uses=1]

        store float %elemToDemote16, float* %elemPtr17

        ret void

   }

 

Assembler (intel format):

 

15c00010 83ec2c          sub     esp,2Ch

15c00013 8b442434        mov     eax,dword ptr [esp+34h]

15c00017 f30f10400c      movss   xmm0,dword ptr [eax+0Ch]

15c0001c f30f104804      movss   xmm1,dword ptr [eax+4]

15c00021 0f14c8          unpcklps xmm1,xmm0

15c00024 f30f104008      movss   xmm0,dword ptr [eax+8]

15c00029 f30f1010        movss   xmm2,dword ptr [eax]

15c0002d 0f14d0          unpcklps xmm2,xmm0

15c00030 0f14d1          unpcklps xmm2,xmm1

15c00033 0f291424        movaps  xmmword ptr [esp],xmm2
ss:0023:0012f238=0012f2580122ef730000000100000000

 

The relevant registers:

 

Xmm2 8.000000e+000: 4.000000e+000: 2.000000e+000: 1.000000e+000    //
the vector got nicely constructed

Esp 12f238    // but it has noplace to go and throws a
general-protection exception.

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20070928/39ffec5c/attachment.html>


More information about the llvm-dev mailing list