[LLVMdev] FP Intrinsics
Chris Lattner
sabre at nondot.org
Wed Mar 16 08:40:56 PST 2005
On Fri, 11 Mar 2005, Morten Ofstad wrote:
> Update: I have been working on this all day, and I finally got it working
> more or less with the pattern instruction selector... However, the generated
> code is not very good, and I haven't implemented the expand to calls if the
> target does not support these FP instructions.
Oh, I see you fixed the F64 thing, sorry, disregard that part of the
previous email :-/
> As an example, in the following function the sub abs and compare compiles to
> 13 instructions! Also it has changed the result of the abs to be a 64 bit
> double when it stores it, while my intention was to get a 32 bit float.
> For comparision I included the code generated by the X86ISelSimple which is
> only 8 instructions... it seems the compare is being generated twice in two
> different ways with the pattern selector?!
I'm not sure if the specific problem you're talking about here is the
comparison-against-zero issue, or something else. Can you reduce it to a
smaller test case or annotate the source with the problem you're hitting?
Thanks,
-Chris
> I also attached the diff of my current version
>
> m.
>
> internal void %EvaluatePoint3D65() {
> EntryBlock:
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
> %Value = call float %ReadVoxel( void* %_hVB59 ) ; <float>
> [#uses=1]
> %arg2 = load float* %T64 ; <float> [#uses=1]
> %VMCommandSubtract = sub float %Value, %arg2 ; <float>
> [#uses=1]
> %VMCommandAbs = call float %llvm.abs( float %VMCommandSubtract )
> ; <float> [#uses=2]
> %isNonZero = setgt float %VMCommandAbs, 0.000000e+000 ;
> <bool> [#uses=1]
> br bool %isNonZero, label %NonZero, label %Zero
>
> NonZero: ; preds = %EntryBlock
> call void %Shader1DLookupLinear( <4 x float>* %_ARGB56, void* %_hS60,
> float %VMCommandAbs, void* %_hContext3D58 )
> ret void
>
> Zero: ; preds = %EntryBlock
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
> store float 0.000000e+000, float* getelementptr ([4 x float]* cast
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
> ret void
> }
>
> Generated with ISelPattern:
>
> 17160410 sub esp,1Ch
> 17160413 mov dword ptr ds:[161D6240h],0
> 1716041D mov dword ptr ds:[161D6244h],0
> 17160427 mov dword ptr ds:[161D6248h],0
> 17160431 mov dword ptr ds:[161D624Ch],0
> 1716043B mov eax,76E4560h
> 17160440 mov dword ptr [esp],eax
> 17160443 call HueVMReadCommands_LLVMReadVoxel (19BB229h)
> 17160448 fsub dword ptr ds:[161D6280h]
> 1716044E fabs
> 17160450 fst qword ptr [esp+14h]
> 17160454 ftst
> 17160456 fstp st(0)
> 17160458 fnstsw ax
> 1716045A sahf
> 1716045B fldz
> 1716045D fchs
> 1716045F fld qword ptr [esp+14h]
> 17160463 fucomip st,st(1)
> 17160465 fstp st(0)
> 17160467 jbe 17160498
> 1716046D mov eax,76E4F60h
> 17160472 mov dword ptr [esp+0Ch],eax
> 17160476 fld qword ptr [esp+14h]
> 1716047A fstp dword ptr [esp+8]
> 1716047E mov eax,15900060h
> 17160483 mov dword ptr [esp+4],eax
> 17160487 mov eax,161D6240h
> 1716048C mov dword ptr [esp],eax
> 1716048F call HueVMShaderCommands_LLVMShader1DLookupLinear (19D8B76h)
> 17160494 add esp,1Ch
> 17160497 ret
> 17160498 mov dword ptr ds:[161D6240h],0
> 171604A2 mov dword ptr ds:[161D6244h],0
> 171604AC mov dword ptr ds:[161D6248h],0
> 171604B6 mov dword ptr ds:[161D624Ch],0
> 171604C0 add esp,1Ch
> 171604C3 ret
>
> Generated with ISelSimple:
>
> 17160440 sub esp,1Ch
> 17160443 mov eax,16237200h
> 17160448 mov dword ptr [eax],0
> 1716044E mov eax,16237200h
> 17160453 mov dword ptr [eax+4],0
> 1716045A mov eax,16237200h
> 1716045F mov dword ptr [eax+8],0
> 17160466 mov eax,16237200h
> 1716046B mov dword ptr [eax+0Ch],0
> 17160472 mov eax,19BB229h
> 17160477 mov ecx,76E4560h
> 1716047C mov dword ptr [esp],ecx
> 1716047F call eax
> 17160481 fsub dword ptr ds:[16237240h]
> 17160487 fabs
> 17160489 fst qword ptr [esp+14h]
> 1716048D ftst
> 1716048F fstp st(0)
> 17160491 fnstsw ax
> 17160493 sahf
> 17160494 jbe 171604C7
> 1716049A mov eax,19D8B76h
> 1716049F mov ecx,16237200h
> 171604A4 mov dword ptr [esp],ecx
> 171604A7 mov ecx,15900060h
> 171604AC mov dword ptr [esp+4],ecx
> 171604B0 fld qword ptr [esp+14h]
> 171604B4 fstp dword ptr [esp+8]
> 171604B8 mov ecx,76E4F60h
> 171604BD mov dword ptr [esp+0Ch],ecx
> 171604C1 call eax
> 171604C3 add esp,1Ch
> 171604C6 ret
> 171604C7 mov eax,16237200h
> 171604CC mov dword ptr [eax],0
> 171604D2 mov eax,16237200h
> 171604D7 mov dword ptr [eax+4],0
> 171604DE mov eax,16237200h
> 171604E3 mov dword ptr [eax+8],0
> 171604EA mov eax,16237200h
> 171604EF mov dword ptr [eax+0Ch],0
> 171604F6 add esp,1Ch
> 171604F9 ret
>
-Chris
--
http://nondot.org/sabre/
http://llvm.cs.uiuc.edu/
More information about the llvm-dev
mailing list