[LLVMdev] FP Intrinsics

Morten Ofstad morten at hue.no
Fri Mar 11 08:59:48 PST 2005


Update: I have been working on this all day, and I finally got it 
working more or less with the pattern instruction selector... However, 
the generated code is not very good, and I haven't implemented the 
expand to calls if the target does not support these FP instructions.

As an example, in the following function the sub abs and compare 
compiles to 13 instructions! Also it has changed the result of the abs 
to be a 64 bit double when it stores it, while my intention was to get a 
32 bit float.
For comparision I included the code generated by the X86ISelSimple which 
is only 8 instructions... it seems the compare is being generated twice 
in two different ways with the pattern selector?!

I also attached the diff of my current version

m.

internal void %EvaluatePoint3D65() {
EntryBlock:
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
	%Value = call float %ReadVoxel( void* %_hVB59 )		; <float> [#uses=1]
	%arg2 = load float* %T64		; <float> [#uses=1]
	%VMCommandSubtract = sub float %Value, %arg2		; <float> [#uses=1]
	%VMCommandAbs = call float %llvm.abs( float %VMCommandSubtract )		; 
<float> [#uses=2]
	%isNonZero = setgt float %VMCommandAbs, 0.000000e+000		; <bool> [#uses=1]
	br bool %isNonZero, label %NonZero, label %Zero

NonZero:		; preds = %EntryBlock
	call void %Shader1DLookupLinear( <4 x float>* %_ARGB56, void* %_hS60, 
float %VMCommandAbs, void* %_hContext3D58 )
	ret void

Zero:		; preds = %EntryBlock
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
	store float 0.000000e+000, float* getelementptr ([4 x float]* cast (<4 
x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
	ret void
}

Generated with ISelPattern:

17160410  sub         esp,1Ch
17160413  mov         dword ptr ds:[161D6240h],0
1716041D  mov         dword ptr ds:[161D6244h],0
17160427  mov         dword ptr ds:[161D6248h],0
17160431  mov         dword ptr ds:[161D624Ch],0
1716043B  mov         eax,76E4560h
17160440  mov         dword ptr [esp],eax
17160443  call        HueVMReadCommands_LLVMReadVoxel (19BB229h)
17160448  fsub        dword ptr ds:[161D6280h]
1716044E  fabs
17160450  fst         qword ptr [esp+14h]
17160454  ftst
17160456  fstp        st(0)
17160458  fnstsw      ax
1716045A  sahf
1716045B  fldz
1716045D  fchs
1716045F  fld         qword ptr [esp+14h]
17160463  fucomip     st,st(1)
17160465  fstp        st(0)
17160467  jbe         17160498
1716046D  mov         eax,76E4F60h
17160472  mov         dword ptr [esp+0Ch],eax
17160476  fld         qword ptr [esp+14h]
1716047A  fstp        dword ptr [esp+8]
1716047E  mov         eax,15900060h
17160483  mov         dword ptr [esp+4],eax
17160487  mov         eax,161D6240h
1716048C  mov         dword ptr [esp],eax
1716048F  call        HueVMShaderCommands_LLVMShader1DLookupLinear 
(19D8B76h)
17160494  add         esp,1Ch
17160497  ret
17160498  mov         dword ptr ds:[161D6240h],0
171604A2  mov         dword ptr ds:[161D6244h],0
171604AC  mov         dword ptr ds:[161D6248h],0
171604B6  mov         dword ptr ds:[161D624Ch],0
171604C0  add         esp,1Ch
171604C3  ret

Generated with ISelSimple:

17160440  sub         esp,1Ch
17160443  mov         eax,16237200h
17160448  mov         dword ptr [eax],0
1716044E  mov         eax,16237200h
17160453  mov         dword ptr [eax+4],0
1716045A  mov         eax,16237200h
1716045F  mov         dword ptr [eax+8],0
17160466  mov         eax,16237200h
1716046B  mov         dword ptr [eax+0Ch],0
17160472  mov         eax,19BB229h
17160477  mov         ecx,76E4560h
1716047C  mov         dword ptr [esp],ecx
1716047F  call        eax
17160481  fsub        dword ptr ds:[16237240h]
17160487  fabs
17160489  fst         qword ptr [esp+14h]
1716048D  ftst
1716048F  fstp        st(0)
17160491  fnstsw      ax
17160493  sahf
17160494  jbe         171604C7
1716049A  mov         eax,19D8B76h
1716049F  mov         ecx,16237200h
171604A4  mov         dword ptr [esp],ecx
171604A7  mov         ecx,15900060h
171604AC  mov         dword ptr [esp+4],ecx
171604B0  fld         qword ptr [esp+14h]
171604B4  fstp        dword ptr [esp+8]
171604B8  mov         ecx,76E4F60h
171604BD  mov         dword ptr [esp+0Ch],ecx
171604C1  call        eax
171604C3  add         esp,1Ch
171604C6  ret
171604C7  mov         eax,16237200h
171604CC  mov         dword ptr [eax],0
171604D2  mov         eax,16237200h
171604D7  mov         dword ptr [eax+4],0
171604DE  mov         eax,16237200h
171604E3  mov         dword ptr [eax+8],0
171604EA  mov         eax,16237200h
171604EF  mov         dword ptr [eax+0Ch],0
171604F6  add         esp,1Ch
171604F9  ret
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: temp.patch
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20050311/82268a2c/attachment.ksh>


More information about the llvm-dev mailing list