[LLVMdev] FP Intrinsics
Morten Ofstad
morten at hue.no
Fri Mar 11 08:59:48 PST 2005
Update: I have been working on this all day, and I finally got it
working more or less with the pattern instruction selector... However,
the generated code is not very good, and I haven't implemented the
expand to calls if the target does not support these FP instructions.
As an example, in the following function the sub abs and compare
compiles to 13 instructions! Also it has changed the result of the abs
to be a 64 bit double when it stores it, while my intention was to get a
32 bit float.
For comparision I included the code generated by the X86ISelSimple which
is only 8 instructions... it seems the compare is being generated twice
in two different ways with the pattern selector?!
I also attached the diff of my current version
m.
internal void %EvaluatePoint3D65() {
EntryBlock:
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
%Value = call float %ReadVoxel( void* %_hVB59 ) ; <float> [#uses=1]
%arg2 = load float* %T64 ; <float> [#uses=1]
%VMCommandSubtract = sub float %Value, %arg2 ; <float> [#uses=1]
%VMCommandAbs = call float %llvm.abs( float %VMCommandSubtract ) ;
<float> [#uses=2]
%isNonZero = setgt float %VMCommandAbs, 0.000000e+000 ; <bool> [#uses=1]
br bool %isNonZero, label %NonZero, label %Zero
NonZero: ; preds = %EntryBlock
call void %Shader1DLookupLinear( <4 x float>* %_ARGB56, void* %_hS60,
float %VMCommandAbs, void* %_hContext3D58 )
ret void
Zero: ; preds = %EntryBlock
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4
x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
ret void
}
Generated with ISelPattern:
17160410 sub esp,1Ch
17160413 mov dword ptr ds:[161D6240h],0
1716041D mov dword ptr ds:[161D6244h],0
17160427 mov dword ptr ds:[161D6248h],0
17160431 mov dword ptr ds:[161D624Ch],0
1716043B mov eax,76E4560h
17160440 mov dword ptr [esp],eax
17160443 call HueVMReadCommands_LLVMReadVoxel (19BB229h)
17160448 fsub dword ptr ds:[161D6280h]
1716044E fabs
17160450 fst qword ptr [esp+14h]
17160454 ftst
17160456 fstp st(0)
17160458 fnstsw ax
1716045A sahf
1716045B fldz
1716045D fchs
1716045F fld qword ptr [esp+14h]
17160463 fucomip st,st(1)
17160465 fstp st(0)
17160467 jbe 17160498
1716046D mov eax,76E4F60h
17160472 mov dword ptr [esp+0Ch],eax
17160476 fld qword ptr [esp+14h]
1716047A fstp dword ptr [esp+8]
1716047E mov eax,15900060h
17160483 mov dword ptr [esp+4],eax
17160487 mov eax,161D6240h
1716048C mov dword ptr [esp],eax
1716048F call HueVMShaderCommands_LLVMShader1DLookupLinear
(19D8B76h)
17160494 add esp,1Ch
17160497 ret
17160498 mov dword ptr ds:[161D6240h],0
171604A2 mov dword ptr ds:[161D6244h],0
171604AC mov dword ptr ds:[161D6248h],0
171604B6 mov dword ptr ds:[161D624Ch],0
171604C0 add esp,1Ch
171604C3 ret
Generated with ISelSimple:
17160440 sub esp,1Ch
17160443 mov eax,16237200h
17160448 mov dword ptr [eax],0
1716044E mov eax,16237200h
17160453 mov dword ptr [eax+4],0
1716045A mov eax,16237200h
1716045F mov dword ptr [eax+8],0
17160466 mov eax,16237200h
1716046B mov dword ptr [eax+0Ch],0
17160472 mov eax,19BB229h
17160477 mov ecx,76E4560h
1716047C mov dword ptr [esp],ecx
1716047F call eax
17160481 fsub dword ptr ds:[16237240h]
17160487 fabs
17160489 fst qword ptr [esp+14h]
1716048D ftst
1716048F fstp st(0)
17160491 fnstsw ax
17160493 sahf
17160494 jbe 171604C7
1716049A mov eax,19D8B76h
1716049F mov ecx,16237200h
171604A4 mov dword ptr [esp],ecx
171604A7 mov ecx,15900060h
171604AC mov dword ptr [esp+4],ecx
171604B0 fld qword ptr [esp+14h]
171604B4 fstp dword ptr [esp+8]
171604B8 mov ecx,76E4F60h
171604BD mov dword ptr [esp+0Ch],ecx
171604C1 call eax
171604C3 add esp,1Ch
171604C6 ret
171604C7 mov eax,16237200h
171604CC mov dword ptr [eax],0
171604D2 mov eax,16237200h
171604D7 mov dword ptr [eax+4],0
171604DE mov eax,16237200h
171604E3 mov dword ptr [eax+8],0
171604EA mov eax,16237200h
171604EF mov dword ptr [eax+0Ch],0
171604F6 add esp,1Ch
171604F9 ret
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: temp.patch
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20050311/82268a2c/attachment.ksh>
More information about the llvm-dev
mailing list