[LLVMdev] "equivalent" .ll files diverge after optimizations are applied

Dale Johannesen dalej at apple.com
Tue Aug 31 11:51:22 PDT 2010


Using MM registers is wrong unless the user has specifically asked for  
it, which doesn't seem to be the case here.
In the awesome MMX architecture, touching an MM register makes  
subsequent x87 operations fail unless an EMMS instruction is issued  
first; none of the compilers here are smart enough to insert EMMS  
instructions in the right places, so the only safe thing is not to use  
these registers.  There is no x87 instruction shown here, but you've  
probably got one in the full test suite and not in the test by itself,  
which fits your data.

Why this is happening is not immediately clear.  It looks like the  
successful code is doing an aggregate copy field-by-field while the  
failing code has lowered this to a memcpy.   I would certainly expect  
the memcpy expansion to be smart enough to avoid using MM registers,  
though; that's a serious bug if it isn't.

	movd	%xmm0, %rax
	movd	%rax, %mm0
	movq2dq	%mm0, %xmm1
	movq2dq	%mm0, %xmm2
	punpcklqdq	%xmm2, %xmm1    ## xmm1 = xmm1[0],xmm2[0]
	movq	16(%rsp), %rax
	movd	%rax, %mm0
	movq2dq	%mm0, %xmm0
	punpcklqdq	%xmm2, %xmm0    ## xmm0 = xmm0[0],xmm2[0]


On Aug 31, 2010, at 11:18 AMPDT, Argyrios Kyrtzidis wrote:

> Hi,
>
> I've attached 2 .ll files which are supposed to be equivalent but  
> 'unopt-fail.ll' causes a crash in webkit's test suite while 'unopt- 
> pass.ll' does not. I can't give more details about the crash, when I  
> run the crashing test it in isolation it passes, when I run the full  
> suite it crashes; it boggles the mind.
>
> Below I provide the optimized asm that is produced from each file.  
> Could you give a hint on what is the problem ?
> I also attached 't.cpp' which approximates the source that the .ll  
> files came from.
>
> -Argiris
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: unopt-fail.ll
Type: application/octet-stream
Size: 15313 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100831/b4aa9ded/attachment.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: unopt-pass.ll
Type: application/octet-stream
Size: 17832 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100831/b4aa9ded/attachment-0001.obj>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: t.cpp
Type: application/octet-stream
Size: 447 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20100831/b4aa9ded/attachment-0002.obj>
-------------- next part --------------
>
>
>
> $ opt -std-compile-opts unopt-pass.ll -o - | llc -o -
>
> 	.section	__TEXT,__text,regular,pure_instructions
> 	.globl	 
> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
> 	.align	4, 0x90
> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE:  
> ## @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
> ## BB#0:
> 	subq	$24, %rsp
> 	movq	%rsi, %rdx
> 	movl	$0, 16(%rsp)
> 	movl	$0, 20(%rsp)
> 	movl	$0, 8(%rsp)
> 	movl	$0, 12(%rsp)
> 	movq	8(%rdi), %rsi
> 	leaq	16(%rsp), %rcx
> 	leaq	8(%rsp), %r8
> 	callq	 
> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
> 	movss	8(%rsp), %xmm1
> 	movss	12(%rsp), %xmm0
> 	subss	20(%rsp), %xmm0
> 	subss	16(%rsp), %xmm1
>                                        ## kill: XMM1<def> XMM1<kill>  
> XMM1<def>
> 	insertps	$16, %xmm0, %xmm1 ## xmm1 = xmm1[0],xmm0[0],xmm1[2,3]
> 	movq	16(%rsp), %xmm0
> 	addq	$24, %rsp
> 	ret
>
>
> $ opt -std-compile-opts unopt-fail.ll -o - | llc -o -
>
> 	.section	__TEXT,__text,regular,pure_instructions
> 	.globl	 
> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
> 	.align	4, 0x90
> __ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE:  
> ## @_ZN7WebCore15GraphicsContext19roundToDevicePixelsERKNS_9FloatRectE
> ## BB#0:
> 	subq	$24, %rsp
> 	movq	%rsi, %rdx
> 	movl	$0, 16(%rsp)
> 	movl	$0, 20(%rsp)
> 	movl	$0, 8(%rsp)
> 	movl	$0, 12(%rsp)
> 	movq	8(%rdi), %rsi
> 	leaq	16(%rsp), %rcx
> 	leaq	8(%rsp), %r8
> 	callq	 
> __ZN7WebCore5mouniEPNS_15GraphicsContextEPNS_30GraphicsContextPlatformPrivateERKNS_9FloatRectERNS_10FloatPointES8_
> 	movss	8(%rsp), %xmm0
> 	movss	12(%rsp), %xmm1
> 	subss	20(%rsp), %xmm1
> 	subss	16(%rsp), %xmm0
> 	insertps	$16, %xmm1, %xmm0 ## xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
> 	movd	%xmm0, %rax
> 	movd	%rax, %mm0
> 	movq2dq	%mm0, %xmm1
> 	movq2dq	%mm0, %xmm2
> 	punpcklqdq	%xmm2, %xmm1    ## xmm1 = xmm1[0],xmm2[0]
> 	movq	16(%rsp), %rax
> 	movd	%rax, %mm0
> 	movq2dq	%mm0, %xmm0
> 	punpcklqdq	%xmm2, %xmm0    ## xmm0 = xmm0[0],xmm2[0]
> 	addq	$24, %rsp
> 	ret
>
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev



More information about the llvm-dev mailing list