[LLVMdev] Vector troubles
Chuck Rose III
cfr at adobe.com
Fri Sep 28 14:31:01 PDT 2007
Hola LLVMers,
I'm working on engaging SSE via the LLVM vector ops on x86. I had some
questions a while back that you all helped out on, but I'm seeing
similar issues and was hoping you'd have some ideas. Below is the dump
of the LLVM IR of a program which is designed to take a vector stored in
a float*, build an LLVM vector from it, copy it to another vector, and
then take it apart and store it back out in another float*. This will
live on the boundary of our system and would be a function designed to
promote a raw, potentially unaligned, value into a vector that the LLVM
system can work with a whole bunch.
It is dying trying to store a our working vector into one of the LLVM
vectors created on the stack. Despite the align-16 directive on the
alloca instruction, it is not always aligning to a 16-byte boundary.
I did a sync and build this morning, so my LLVM is quite fresh.
Thank you for any help!
Chuck.
My program:
target datalayout =
"E-p:32:32:32-i1:8:8:8-i8:8:8:8-i32:32:32:32-f32:32:32:32"
define void @promoteCopyAndReturn(float* %promoteReturn, float*
%toPromote) {
Entry:
%Promoted_promoteReturn_Ptr = alloca <4 x float>, align 16
; <<4 x float>*> [#uses=2]
%Promoted_toPromote_Ptr = alloca <4 x float>, align 16
; <<4 x float>*> [#uses=2]
%elemPtr = getelementptr float* %toPromote, i32 0
; <float*> [#uses=1]
%elemLoaded = load float* %elemPtr ; <float>
[#uses=1]
%vectorPromotion = insertelement <4 x float> undef, float
%elemLoaded, i32 0 ; <<4 x float>> [#uses=1]
%elemPtr1 = getelementptr float* %toPromote, i32 1
; <float*> [#uses=1]
%elemLoaded2 = load float* %elemPtr1 ; <float>
[#uses=1]
%vectorPromotion3 = insertelement <4 x float> %vectorPromotion,
float %elemLoaded2, i32 1 ; <<4 x float>> [#uses=1]
%elemPtr4 = getelementptr float* %toPromote, i32 2
; <float*> [#uses=1]
%elemLoaded5 = load float* %elemPtr4 ; <float>
[#uses=1]
%vectorPromotion6 = insertelement <4 x float> %vectorPromotion3,
float %elemLoaded5, i32 2 ; <<4 x float>> [#uses=1]
%elemPtr7 = getelementptr float* %toPromote, i32 3
; <float*> [#uses=1]
%elemLoaded8 = load float* %elemPtr7 ; <float>
[#uses=1]
%vectorPromotion9 = insertelement <4 x float> %vectorPromotion6,
float %elemLoaded8, i32 3 ; <<4 x float>> [#uses=1]
store <4 x float> %vectorPromotion9, <4 x float>*
%Promoted_toPromote_Ptr <<<<<<<<-------- dying when it executes this
line (assembly below)
%toPromote10 = load <4 x float>* %Promoted_toPromote_Ptr
; <<4 x float>> [#uses=1]
br label %Body
Body: ; preds = %Entry
store <4 x float> %toPromote10, <4 x float>*
%Promoted_promoteReturn_Ptr
br label %Exit
Exit: ; preds = %Body
%vectorToDemote = load <4 x float>* %Promoted_promoteReturn_Ptr
; <<4 x float>> [#uses=4]
%elemToDemote = extractelement <4 x float> %vectorToDemote, i32
0 ; <float> [#uses=1]
%elemPtr11 = getelementptr float* %promoteReturn, i32 0
; <float*> [#uses=1]
store float %elemToDemote, float* %elemPtr11
%elemToDemote12 = extractelement <4 x float> %vectorToDemote,
i32 1 ; <float> [#uses=1]
%elemPtr13 = getelementptr float* %promoteReturn, i32 1
; <float*> [#uses=1]
store float %elemToDemote12, float* %elemPtr13
%elemToDemote14 = extractelement <4 x float> %vectorToDemote,
i32 2 ; <float> [#uses=1]
%elemPtr15 = getelementptr float* %promoteReturn, i32 2
; <float*> [#uses=1]
store float %elemToDemote14, float* %elemPtr15
%elemToDemote16 = extractelement <4 x float> %vectorToDemote,
i32 3 ; <float> [#uses=1]
%elemPtr17 = getelementptr float* %promoteReturn, i32 3
; <float*> [#uses=1]
store float %elemToDemote16, float* %elemPtr17
ret void
}
Assembler (intel format):
15c00010 83ec2c sub esp,2Ch
15c00013 8b442434 mov eax,dword ptr [esp+34h]
15c00017 f30f10400c movss xmm0,dword ptr [eax+0Ch]
15c0001c f30f104804 movss xmm1,dword ptr [eax+4]
15c00021 0f14c8 unpcklps xmm1,xmm0
15c00024 f30f104008 movss xmm0,dword ptr [eax+8]
15c00029 f30f1010 movss xmm2,dword ptr [eax]
15c0002d 0f14d0 unpcklps xmm2,xmm0
15c00030 0f14d1 unpcklps xmm2,xmm1
15c00033 0f291424 movaps xmmword ptr [esp],xmm2
ss:0023:0012f238=0012f2580122ef730000000100000000
The relevant registers:
Xmm2 8.000000e+000: 4.000000e+000: 2.000000e+000: 1.000000e+000 //
the vector got nicely constructed
Esp 12f238 // but it has noplace to go and throws a
general-protection exception.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20070928/39ffec5c/attachment.html>
More information about the llvm-dev
mailing list