[LLVMdev] FP Intrinsics

Chris Lattner sabre at nondot.org
Wed Mar 16 08:40:56 PST 2005


On Fri, 11 Mar 2005, Morten Ofstad wrote:
> Update: I have been working on this all day, and I finally got it working 
> more or less with the pattern instruction selector... However, the generated 
> code is not very good, and I haven't implemented the expand to calls if the 
> target does not support these FP instructions.

Oh, I see you fixed the F64 thing, sorry, disregard that part of the 
previous email :-/

> As an example, in the following function the sub abs and compare compiles to 
> 13 instructions! Also it has changed the result of the abs to be a 64 bit 
> double when it stores it, while my intention was to get a 32 bit float.
> For comparision I included the code generated by the X86ISelSimple which is 
> only 8 instructions... it seems the compare is being generated twice in two 
> different ways with the pattern selector?!

I'm not sure if the specific problem you're talking about here is the 
comparison-against-zero issue, or something else.  Can you reduce it to a 
smaller test case or annotate the source with the problem you're hitting?

Thanks,

-Chris

> I also attached the diff of my current version
>
> m.
>
> internal void %EvaluatePoint3D65() {
> EntryBlock:
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
> 	%Value = call float %ReadVoxel( void* %_hVB59 )		; <float> 
> [#uses=1]
> 	%arg2 = load float* %T64		; <float> [#uses=1]
> 	%VMCommandSubtract = sub float %Value, %arg2		; <float> 
> [#uses=1]
> 	%VMCommandAbs = call float %llvm.abs( float %VMCommandSubtract ) 
> ; <float> [#uses=2]
> 	%isNonZero = setgt float %VMCommandAbs, 0.000000e+000		; 
> <bool> [#uses=1]
> 	br bool %isNonZero, label %NonZero, label %Zero
>
> NonZero:		; preds = %EntryBlock
> 	call void %Shader1DLookupLinear( <4 x float>* %_ARGB56, void* %_hS60, 
> float %VMCommandAbs, void* %_hContext3D58 )
> 	ret void
>
> Zero:		; preds = %EntryBlock
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 0)
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 1)
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 2)
> 	store float 0.000000e+000, float* getelementptr ([4 x float]* cast 
> (<4 x float>* %_ARGB56 to [4 x float]*), int 0, int 3)
> 	ret void
> }
>
> Generated with ISelPattern:
>
> 17160410  sub         esp,1Ch
> 17160413  mov         dword ptr ds:[161D6240h],0
> 1716041D  mov         dword ptr ds:[161D6244h],0
> 17160427  mov         dword ptr ds:[161D6248h],0
> 17160431  mov         dword ptr ds:[161D624Ch],0
> 1716043B  mov         eax,76E4560h
> 17160440  mov         dword ptr [esp],eax
> 17160443  call        HueVMReadCommands_LLVMReadVoxel (19BB229h)
> 17160448  fsub        dword ptr ds:[161D6280h]
> 1716044E  fabs
> 17160450  fst         qword ptr [esp+14h]
> 17160454  ftst
> 17160456  fstp        st(0)
> 17160458  fnstsw      ax
> 1716045A  sahf
> 1716045B  fldz
> 1716045D  fchs
> 1716045F  fld         qword ptr [esp+14h]
> 17160463  fucomip     st,st(1)
> 17160465  fstp        st(0)
> 17160467  jbe         17160498
> 1716046D  mov         eax,76E4F60h
> 17160472  mov         dword ptr [esp+0Ch],eax
> 17160476  fld         qword ptr [esp+14h]
> 1716047A  fstp        dword ptr [esp+8]
> 1716047E  mov         eax,15900060h
> 17160483  mov         dword ptr [esp+4],eax
> 17160487  mov         eax,161D6240h
> 1716048C  mov         dword ptr [esp],eax
> 1716048F  call        HueVMShaderCommands_LLVMShader1DLookupLinear (19D8B76h)
> 17160494  add         esp,1Ch
> 17160497  ret
> 17160498  mov         dword ptr ds:[161D6240h],0
> 171604A2  mov         dword ptr ds:[161D6244h],0
> 171604AC  mov         dword ptr ds:[161D6248h],0
> 171604B6  mov         dword ptr ds:[161D624Ch],0
> 171604C0  add         esp,1Ch
> 171604C3  ret
>
> Generated with ISelSimple:
>
> 17160440  sub         esp,1Ch
> 17160443  mov         eax,16237200h
> 17160448  mov         dword ptr [eax],0
> 1716044E  mov         eax,16237200h
> 17160453  mov         dword ptr [eax+4],0
> 1716045A  mov         eax,16237200h
> 1716045F  mov         dword ptr [eax+8],0
> 17160466  mov         eax,16237200h
> 1716046B  mov         dword ptr [eax+0Ch],0
> 17160472  mov         eax,19BB229h
> 17160477  mov         ecx,76E4560h
> 1716047C  mov         dword ptr [esp],ecx
> 1716047F  call        eax
> 17160481  fsub        dword ptr ds:[16237240h]
> 17160487  fabs
> 17160489  fst         qword ptr [esp+14h]
> 1716048D  ftst
> 1716048F  fstp        st(0)
> 17160491  fnstsw      ax
> 17160493  sahf
> 17160494  jbe         171604C7
> 1716049A  mov         eax,19D8B76h
> 1716049F  mov         ecx,16237200h
> 171604A4  mov         dword ptr [esp],ecx
> 171604A7  mov         ecx,15900060h
> 171604AC  mov         dword ptr [esp+4],ecx
> 171604B0  fld         qword ptr [esp+14h]
> 171604B4  fstp        dword ptr [esp+8]
> 171604B8  mov         ecx,76E4F60h
> 171604BD  mov         dword ptr [esp+0Ch],ecx
> 171604C1  call        eax
> 171604C3  add         esp,1Ch
> 171604C6  ret
> 171604C7  mov         eax,16237200h
> 171604CC  mov         dword ptr [eax],0
> 171604D2  mov         eax,16237200h
> 171604D7  mov         dword ptr [eax+4],0
> 171604DE  mov         eax,16237200h
> 171604E3  mov         dword ptr [eax+8],0
> 171604EA  mov         eax,16237200h
> 171604EF  mov         dword ptr [eax+0Ch],0
> 171604F6  add         esp,1Ch
> 171604F9  ret
>

-Chris

-- 
http://nondot.org/sabre/
http://llvm.cs.uiuc.edu/




More information about the llvm-dev mailing list