[LLVMdev] "equivalent" .ll files diverge after optimizations are applied

Tue Aug 31 13:29:29 PDT 2010

On Aug 31, 2010, at 1:21 PMPDT, Argyrios Kyrtzidis wrote:
>
> Just to be clear, are you saying that the fact that, after using llc  
> on the second IR, the produced asm is using MM registers, indicates  
> a bug ?

Yes.  It's not immediately obvious whether it's in the opt or llc,  
though.
Chris was doing work involving <2 x float> and may know about this.

> -Argiris
>
>
> On Aug 31, 2010, at 7:51 PM, Dale Johannesen wrote:
>
>> Using MM registers is wrong unless the user has specifically asked  
>> for it, which doesn't seem to be the case here.
>> In the awesome MMX architecture, touching an MM register makes  
>> subsequent x87 operations fail unless an EMMS instruction is issued  
>> first; none of the compilers here are smart enough to insert EMMS  
>> instructions in the right places, so the only safe thing is not to  
>> use these registers.  There is no x87 instruction shown here, but  
>> you've probably got one in the full test suite and not in the test  
>> by itself, which fits your data.
>>
>> Why this is happening is not immediately clear.  It looks like the  
>> successful code is doing an aggregate copy field-by-field while the  
>> failing code has lowered this to a memcpy.   I would certainly  
>> expect the memcpy expansion to be smart enough to avoid using MM  
>> registers, though; that's a serious bug if it isn't.
>>
>> 	movd	%xmm0, %rax
>> 	movd	%rax, %mm0
>> 	movq2dq	%mm0, %xmm1
>> 	movq2dq	%mm0, %xmm2
>> 	punpcklqdq	%xmm2, %xmm1    ## xmm1 = xmm1[0],xmm2[0]
>> 	movq	16(%rsp), %rax
>> 	movd	%rax, %mm0
>> 	movq2dq	%mm0, %xmm0
>> 	punpcklqdq	%xmm2, %xmm0    ## xmm0 = xmm0[0],xmm2[0]
>>
>>
>> On Aug 31, 2010, at 11:18 AMPDT, Argyrios Kyrtzidis wrote:
>>
>>> Hi,
>>>
>>> I've attached 2 .ll files which are supposed to be equivalent but  
>>> 'unopt-fail.ll' causes a crash in webkit's test suite while 'unopt- 
>>> pass.ll' does not. I can't give more details about the crash, when  
>>> I run the crashing test it in isolation it passes, when I run the  
>>> full suite it crashes; it boggles the mind.
>>>
>>> Below I provide the optimized asm that is produced from each file.  
>>> Could you give a hint on what is the problem ?
>>> I also attached 't.cpp' which approximates the source that the .ll  
>>> files came from.
>>>
>>> -Argiris
>>>
>>>
>> <unopt-fail.ll><unopt-pass.ll><t.cpp>
>>>
>>>
>>>
>>>
>>> $ opt -std-compile-opts unopt-pass.ll -o - | llc -o -
>>>
>>> 	.section	__TEXT,__text,regular,pure_instructions
>>> 	.globl	 
>>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
>>> 	.align	4, 0x90
>>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE 
>>> : ##  
>>> @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
>>> ## BB#0:
>>> 	subq	$24, %rsp
>>> 	movq	%rsi, %rdx
>>> 	movl	$0, 16(%rsp)
>>> 	movl	$0, 20(%rsp)
>>> 	movl	$0, 8(%rsp)
>>> 	movl	$0, 12(%rsp)
>>> 	movq	8(%rdi), %rsi
>>> 	leaq	16(%rsp), %rcx
>>> 	leaq	8(%rsp), %r8
>>> 	callq	 
>>> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
>>> 	movss	8(%rsp), %xmm1
>>> 	movss	12(%rsp), %xmm0
>>> 	subss	20(%rsp), %xmm0
>>> 	subss	16(%rsp), %xmm1
>>>                                      ## kill: XMM1<def> XMM1<kill>  
>>> XMM1<def>
>>> 	insertps	$16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
>>> 	movq	16(%rsp), %xmm0
>>> 	addq	$24, %rsp
>>> 	ret
>>>
>>>
>>> $ opt -std-compile-opts unopt-fail.ll -o - | llc -o -
>>>
>>> 	.section	__TEXT,__text,regular,pure_instructions
>>> 	.globl	 
>>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
>>> 	.align	4, 0x90
>>> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE 
>>> : ##  
>>> @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
>>> ## BB#0:
>>> 	subq	$24, %rsp
>>> 	movq	%rsi, %rdx
>>> 	movl	$0, 16(%rsp)
>>> 	movl	$0, 20(%rsp)
>>> 	movl	$0, 8(%rsp)
>>> 	movl	$0, 12(%rsp)
>>> 	movq	8(%rdi), %rsi
>>> 	leaq	16(%rsp), %rcx
>>> 	leaq	8(%rsp), %r8
>>> 	callq	 
>>> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
>>> 	movss	8(%rsp), %xmm0
>>> 	movss	12(%rsp), %xmm1
>>> 	subss	20(%rsp), %xmm1
>>> 	subss	16(%rsp), %xmm0
>>> 	insertps	$16, %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
>>> 	movd	%xmm0, %rax
>>> 	movd	%rax, %mm0
>>> 	movq2dq	%mm0, %xmm1
>>> 	movq2dq	%mm0, %xmm2
>>> 	punpcklqdq	%xmm2, %xmm1    ## xmm1 = xmm1[0],xmm2[0]
>>> 	movq	16(%rsp), %rax
>>> 	movd	%rax, %mm0
>>> 	movq2dq	%mm0, %xmm0
>>> 	punpcklqdq	%xmm2, %xmm0    ## xmm0 = xmm0[0],xmm2[0]
>>> 	addq	$24, %rsp
>>> 	ret
>>>
>>> _______________________________________________
>>> LLVM Developers mailing list
>>> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
>>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
>>
>