[LLVMdev] Bug in X86CompilationCallback_SSE

Dan Gohman gohman at apple.com
Thu Mar 12 09:18:18 PDT 2009


This looks like an interesting idea. As written, the inline asms  
aren't safe
though; they reference %eax, %edx, etc. without declaring such things in
constraints, so the compiler wouldn't know that it can't clobber those
registers.

Dan

On Mar 11, 2009, at 2:39 PM, Corrado Zoccolo wrote:

> I don't know how to file a PR, but I have a patch (see below), that
> should work regardless of abi differences, since it relies on the
> compiler to do the though job.
>
> void X86CompilationCallback_SSE(void) {
>    char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned
>
>    asm volatile (
>    "movl %%eax,(%0)\n"
>    "movl %%edx,4(%0)\n"          // Save EAX/EDX/ECX
>    "movl %%ecx,8(%0)\n"
>    :: "r"(SAVEBUF+64): "memory" );
>
>    asm volatile (
>    // Save all XMM arg registers
>    "movaps  %%xmm0, (%0)\n"
>    "movaps  %%xmm1, 16(%0)\n"
>    "movaps  %%xmm2, 32(%0)\n"
>    "movaps  %%xmm3, 48(%0)\n"
>    :: "r"(SAVEBUF) : "memory" );
>
>    intptr_t *StackPtr=0, RetAddr=0;
>
>    asm volatile (     // get stack ptr and retaddr
>    "movl %%ebp,%0\n"
>    "movl 4(%%ebp),%1\n"
>    :"=r"(StackPtr), "=r"(RetAddr) :: "memory" );
>
>    X86CompilationCallback2(StackPtr,RetAddr); // gcc knows how to
> call this according to the ABI
>
>    asm volatile ( // restore XMM arg registers
>    "movaps  48(%0), %%xmm3\n"
>    "movaps  32(%0), %%xmm2\n"
>    "movaps  16(%0), %%xmm1\n"
>    "movaps  (%0), %%xmm0\n"
>    :: "r"(SAVEBUF) : "memory" );
>
>    asm volatile (
>    "movl (%0),%%eax\n"
>    "movl 4(%0),%%edx\n"          // Restore EAX/EDX/ECX
>    "movl 8(%0),%%ecx\n"
>    :: "r"(SAVEBUF+64): "memory" );
> }
>
> The generated  code is as follows:
>
> Dump of assembler code for function X86CompilationCallback_SSE:
> 0xb74b98e0 <X86CompilationCallback_SSE+0>:      push   %ebp
> 0xb74b98e1 <X86CompilationCallback_SSE+1>:      mov    %esp,%ebp
> 0xb74b98e3 <X86CompilationCallback_SSE+3>:      sub    $0x78,%esp
> 0xb74b98e6 <X86CompilationCallback_SSE+6>:      mov    %esi,-0x8(%ebp)
> 0xb74b98e9 <X86CompilationCallback_SSE+9>:      lea    0x17(%esp),%esi
> 0xb74b98ed <X86CompilationCallback_SSE+13>:     and     
> $0xfffffff0,%esi
> 0xb74b98f0 <X86CompilationCallback_SSE+16>:     mov    %ebx,-0xc(%ebp)
> 0xb74b98f3 <X86CompilationCallback_SSE+19>:     mov    %edi,-0x4(%ebp)
> 0xb74b98f6 <X86CompilationCallback_SSE+22>:     lea    0x40(%esi),%edi
> 0xb74b98f9 <X86CompilationCallback_SSE+25>:     call   0xb7315577
> <__i686.get_pc_thunk.bx>
> 0xb74b98fe <X86CompilationCallback_SSE+30>:     add    $0x76d71e,%ebx
> 0xb74b9904 <X86CompilationCallback_SSE+36>:     mov    %eax,(%edi)
> 0xb74b9906 <X86CompilationCallback_SSE+38>:     mov    %edx,0x4(%edi)
> 0xb74b9909 <X86CompilationCallback_SSE+41>:     mov    %ecx,0x8(%edi)
> 0xb74b990c <X86CompilationCallback_SSE+44>:     movaps %xmm0,(%esi)
> 0xb74b990f <X86CompilationCallback_SSE+47>:     movaps  
> %xmm1,0x10(%esi)
> 0xb74b9913 <X86CompilationCallback_SSE+51>:     movaps  
> %xmm2,0x20(%esi)
> 0xb74b9917 <X86CompilationCallback_SSE+55>:     movaps  
> %xmm3,0x30(%esi)
> 0xb74b991b <X86CompilationCallback_SSE+59>:     mov    %ebp,%edx
> 0xb74b991d <X86CompilationCallback_SSE+61>:     mov    0x4(%ebp),%eax
> 0xb74b9920 <X86CompilationCallback_SSE+64>:     mov    %eax,0x4(%esp)
> 0xb74b9924 <X86CompilationCallback_SSE+68>:     mov    %edx,(%esp)
> 0xb74b9927 <X86CompilationCallback_SSE+71>:     call   0xb7303348
> <X86CompilationCallback2 at plt>
> 0xb74b992c <X86CompilationCallback_SSE+76>:     movaps 0x30(%esi), 
> %xmm3
> 0xb74b9930 <X86CompilationCallback_SSE+80>:     movaps 0x20(%esi), 
> %xmm2
> 0xb74b9934 <X86CompilationCallback_SSE+84>:     movaps 0x10(%esi), 
> %xmm1
> 0xb74b9938 <X86CompilationCallback_SSE+88>:     movaps (%esi),%xmm0
> 0xb74b993b <X86CompilationCallback_SSE+91>:     mov    (%edi),%eax
> 0xb74b993d <X86CompilationCallback_SSE+93>:     mov    0x4(%edi),%edx
> 0xb74b9940 <X86CompilationCallback_SSE+96>:     mov    0x8(%edi),%ecx
> 0xb74b9943 <X86CompilationCallback_SSE+99>:     mov    -0xc(%ebp),%ebx
> 0xb74b9946 <X86CompilationCallback_SSE+102>:    mov    -0x8(%ebp),%esi
> 0xb74b9949 <X86CompilationCallback_SSE+105>:    mov    -0x4(%ebp),%edi
> 0xb74b994c <X86CompilationCallback_SSE+108>:    mov    %ebp,%esp
> 0xb74b994e <X86CompilationCallback_SSE+110>:    pop    %ebp
> 0xb74b994f <X86CompilationCallback_SSE+111>:    ret
> End of assembler dump.
>
> And I verified that it works in my use case.
> Clearly the same should be done for other asm functions in that same
> file (e.g. the non-sse case).
>
> Corrado
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev




More information about the llvm-dev mailing list