[LLVMdev] Bug in X86CompilationCallback_SSE
Dan Gohman
gohman at apple.com
Thu Mar 12 09:18:18 PDT 2009
This looks like an interesting idea. As written, the inline asms
aren't safe
though; they reference %eax, %edx, etc. without declaring such things in
constraints, so the compiler wouldn't know that it can't clobber those
registers.
Dan
On Mar 11, 2009, at 2:39 PM, Corrado Zoccolo wrote:
> I don't know how to file a PR, but I have a patch (see below), that
> should work regardless of abi differences, since it relies on the
> compiler to do the though job.
>
> void X86CompilationCallback_SSE(void) {
> char * SAVEBUF= (char*) alloca(64+12); // alloca is 16byte aligned
>
> asm volatile (
> "movl %%eax,(%0)\n"
> "movl %%edx,4(%0)\n" // Save EAX/EDX/ECX
> "movl %%ecx,8(%0)\n"
> :: "r"(SAVEBUF+64): "memory" );
>
> asm volatile (
> // Save all XMM arg registers
> "movaps %%xmm0, (%0)\n"
> "movaps %%xmm1, 16(%0)\n"
> "movaps %%xmm2, 32(%0)\n"
> "movaps %%xmm3, 48(%0)\n"
> :: "r"(SAVEBUF) : "memory" );
>
> intptr_t *StackPtr=0, RetAddr=0;
>
> asm volatile ( // get stack ptr and retaddr
> "movl %%ebp,%0\n"
> "movl 4(%%ebp),%1\n"
> :"=r"(StackPtr), "=r"(RetAddr) :: "memory" );
>
> X86CompilationCallback2(StackPtr,RetAddr); // gcc knows how to
> call this according to the ABI
>
> asm volatile ( // restore XMM arg registers
> "movaps 48(%0), %%xmm3\n"
> "movaps 32(%0), %%xmm2\n"
> "movaps 16(%0), %%xmm1\n"
> "movaps (%0), %%xmm0\n"
> :: "r"(SAVEBUF) : "memory" );
>
> asm volatile (
> "movl (%0),%%eax\n"
> "movl 4(%0),%%edx\n" // Restore EAX/EDX/ECX
> "movl 8(%0),%%ecx\n"
> :: "r"(SAVEBUF+64): "memory" );
> }
>
> The generated code is as follows:
>
> Dump of assembler code for function X86CompilationCallback_SSE:
> 0xb74b98e0 <X86CompilationCallback_SSE+0>: push %ebp
> 0xb74b98e1 <X86CompilationCallback_SSE+1>: mov %esp,%ebp
> 0xb74b98e3 <X86CompilationCallback_SSE+3>: sub $0x78,%esp
> 0xb74b98e6 <X86CompilationCallback_SSE+6>: mov %esi,-0x8(%ebp)
> 0xb74b98e9 <X86CompilationCallback_SSE+9>: lea 0x17(%esp),%esi
> 0xb74b98ed <X86CompilationCallback_SSE+13>: and
> $0xfffffff0,%esi
> 0xb74b98f0 <X86CompilationCallback_SSE+16>: mov %ebx,-0xc(%ebp)
> 0xb74b98f3 <X86CompilationCallback_SSE+19>: mov %edi,-0x4(%ebp)
> 0xb74b98f6 <X86CompilationCallback_SSE+22>: lea 0x40(%esi),%edi
> 0xb74b98f9 <X86CompilationCallback_SSE+25>: call 0xb7315577
> <__i686.get_pc_thunk.bx>
> 0xb74b98fe <X86CompilationCallback_SSE+30>: add $0x76d71e,%ebx
> 0xb74b9904 <X86CompilationCallback_SSE+36>: mov %eax,(%edi)
> 0xb74b9906 <X86CompilationCallback_SSE+38>: mov %edx,0x4(%edi)
> 0xb74b9909 <X86CompilationCallback_SSE+41>: mov %ecx,0x8(%edi)
> 0xb74b990c <X86CompilationCallback_SSE+44>: movaps %xmm0,(%esi)
> 0xb74b990f <X86CompilationCallback_SSE+47>: movaps
> %xmm1,0x10(%esi)
> 0xb74b9913 <X86CompilationCallback_SSE+51>: movaps
> %xmm2,0x20(%esi)
> 0xb74b9917 <X86CompilationCallback_SSE+55>: movaps
> %xmm3,0x30(%esi)
> 0xb74b991b <X86CompilationCallback_SSE+59>: mov %ebp,%edx
> 0xb74b991d <X86CompilationCallback_SSE+61>: mov 0x4(%ebp),%eax
> 0xb74b9920 <X86CompilationCallback_SSE+64>: mov %eax,0x4(%esp)
> 0xb74b9924 <X86CompilationCallback_SSE+68>: mov %edx,(%esp)
> 0xb74b9927 <X86CompilationCallback_SSE+71>: call 0xb7303348
> <X86CompilationCallback2 at plt>
> 0xb74b992c <X86CompilationCallback_SSE+76>: movaps 0x30(%esi),
> %xmm3
> 0xb74b9930 <X86CompilationCallback_SSE+80>: movaps 0x20(%esi),
> %xmm2
> 0xb74b9934 <X86CompilationCallback_SSE+84>: movaps 0x10(%esi),
> %xmm1
> 0xb74b9938 <X86CompilationCallback_SSE+88>: movaps (%esi),%xmm0
> 0xb74b993b <X86CompilationCallback_SSE+91>: mov (%edi),%eax
> 0xb74b993d <X86CompilationCallback_SSE+93>: mov 0x4(%edi),%edx
> 0xb74b9940 <X86CompilationCallback_SSE+96>: mov 0x8(%edi),%ecx
> 0xb74b9943 <X86CompilationCallback_SSE+99>: mov -0xc(%ebp),%ebx
> 0xb74b9946 <X86CompilationCallback_SSE+102>: mov -0x8(%ebp),%esi
> 0xb74b9949 <X86CompilationCallback_SSE+105>: mov -0x4(%ebp),%edi
> 0xb74b994c <X86CompilationCallback_SSE+108>: mov %ebp,%esp
> 0xb74b994e <X86CompilationCallback_SSE+110>: pop %ebp
> 0xb74b994f <X86CompilationCallback_SSE+111>: ret
> End of assembler dump.
>
> And I verified that it works in my use case.
> Clearly the same should be done for other asm functions in that same
> file (e.g. the non-sse case).
>
> Corrado
> _______________________________________________
> LLVM Developers mailing list
> LLVMdev at cs.uiuc.edu http://llvm.cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
More information about the llvm-dev
mailing list