[LLVMdev] Bug in X86CompilationCallback_SSE

Corrado Zoccolo czoccolo at gmail.com
Wed Mar 11 14:39:44 PDT 2009


I don't know how to file a PR, but I have a patch (see below), that
should work regardless of abi differences, since it relies on the
compiler to do the though job.

void X86CompilationCallback_SSE(void) {
    char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned

    asm volatile (
    "movl %%eax,(%0)\n"
    "movl %%edx,4(%0)\n"          // Save EAX/EDX/ECX
    "movl %%ecx,8(%0)\n"
    :: "r"(SAVEBUF+64): "memory" );

    asm volatile (
    // Save all XMM arg registers
    "movaps  %%xmm0, (%0)\n"
    "movaps  %%xmm1, 16(%0)\n"
    "movaps  %%xmm2, 32(%0)\n"
    "movaps  %%xmm3, 48(%0)\n"
    :: "r"(SAVEBUF) : "memory" );

    intptr_t *StackPtr=0, RetAddr=0;

    asm volatile (     // get stack ptr and retaddr
    "movl %%ebp,%0\n"
    "movl 4(%%ebp),%1\n"
    :"=r"(StackPtr), "=r"(RetAddr) :: "memory" );

    X86CompilationCallback2(StackPtr,RetAddr); // gcc knows how to
call this according to the ABI

    asm volatile ( // restore XMM arg registers
    "movaps  48(%0), %%xmm3\n"
    "movaps  32(%0), %%xmm2\n"
    "movaps  16(%0), %%xmm1\n"
    "movaps  (%0), %%xmm0\n"
    :: "r"(SAVEBUF) : "memory" );

    asm volatile (
    "movl (%0),%%eax\n"
    "movl 4(%0),%%edx\n"          // Restore EAX/EDX/ECX
    "movl 8(%0),%%ecx\n"
    :: "r"(SAVEBUF+64): "memory" );
}

The generated  code is as follows:

Dump of assembler code for function X86CompilationCallback_SSE:
0xb74b98e0 <X86CompilationCallback_SSE+0>:      push   %ebp
0xb74b98e1 <X86CompilationCallback_SSE+1>:      mov    %esp,%ebp
0xb74b98e3 <X86CompilationCallback_SSE+3>:      sub    $0x78,%esp
0xb74b98e6 <X86CompilationCallback_SSE+6>:      mov    %esi,-0x8(%ebp)
0xb74b98e9 <X86CompilationCallback_SSE+9>:      lea    0x17(%esp),%esi
0xb74b98ed <X86CompilationCallback_SSE+13>:     and    $0xfffffff0,%esi
0xb74b98f0 <X86CompilationCallback_SSE+16>:     mov    %ebx,-0xc(%ebp)
0xb74b98f3 <X86CompilationCallback_SSE+19>:     mov    %edi,-0x4(%ebp)
0xb74b98f6 <X86CompilationCallback_SSE+22>:     lea    0x40(%esi),%edi
0xb74b98f9 <X86CompilationCallback_SSE+25>:     call   0xb7315577
<__i686.get_pc_thunk.bx>
0xb74b98fe <X86CompilationCallback_SSE+30>:     add    $0x76d71e,%ebx
0xb74b9904 <X86CompilationCallback_SSE+36>:     mov    %eax,(%edi)
0xb74b9906 <X86CompilationCallback_SSE+38>:     mov    %edx,0x4(%edi)
0xb74b9909 <X86CompilationCallback_SSE+41>:     mov    %ecx,0x8(%edi)
0xb74b990c <X86CompilationCallback_SSE+44>:     movaps %xmm0,(%esi)
0xb74b990f <X86CompilationCallback_SSE+47>:     movaps %xmm1,0x10(%esi)
0xb74b9913 <X86CompilationCallback_SSE+51>:     movaps %xmm2,0x20(%esi)
0xb74b9917 <X86CompilationCallback_SSE+55>:     movaps %xmm3,0x30(%esi)
0xb74b991b <X86CompilationCallback_SSE+59>:     mov    %ebp,%edx
0xb74b991d <X86CompilationCallback_SSE+61>:     mov    0x4(%ebp),%eax
0xb74b9920 <X86CompilationCallback_SSE+64>:     mov    %eax,0x4(%esp)
0xb74b9924 <X86CompilationCallback_SSE+68>:     mov    %edx,(%esp)
0xb74b9927 <X86CompilationCallback_SSE+71>:     call   0xb7303348
<X86CompilationCallback2 at plt>
0xb74b992c <X86CompilationCallback_SSE+76>:     movaps 0x30(%esi),%xmm3
0xb74b9930 <X86CompilationCallback_SSE+80>:     movaps 0x20(%esi),%xmm2
0xb74b9934 <X86CompilationCallback_SSE+84>:     movaps 0x10(%esi),%xmm1
0xb74b9938 <X86CompilationCallback_SSE+88>:     movaps (%esi),%xmm0
0xb74b993b <X86CompilationCallback_SSE+91>:     mov    (%edi),%eax
0xb74b993d <X86CompilationCallback_SSE+93>:     mov    0x4(%edi),%edx
0xb74b9940 <X86CompilationCallback_SSE+96>:     mov    0x8(%edi),%ecx
0xb74b9943 <X86CompilationCallback_SSE+99>:     mov    -0xc(%ebp),%ebx
0xb74b9946 <X86CompilationCallback_SSE+102>:    mov    -0x8(%ebp),%esi
0xb74b9949 <X86CompilationCallback_SSE+105>:    mov    -0x4(%ebp),%edi
0xb74b994c <X86CompilationCallback_SSE+108>:    mov    %ebp,%esp
0xb74b994e <X86CompilationCallback_SSE+110>:    pop    %ebp
0xb74b994f <X86CompilationCallback_SSE+111>:    ret
End of assembler dump.

And I verified that it works in my use case.
Clearly the same should be done for other asm functions in that same
file (e.g. the non-sse case).

Corrado



More information about the llvm-dev mailing list