[cfe-dev] Has something changed in variadic function call ABI...?

Frits van Bommel fvbommel at gmail.com
Tue Dec 14 06:19:32 PST 2010


On Tue, Dec 14, 2010 at 2:50 PM, Fons Rademakers
<Fons.Rademakers at cern.ch> wrote:
>  the trunk version fails in executing code that used to work a few weeks
> ago for code that constructs variadic function calls in x86_64. Here an
> example of our code (which is generated by a function call stub generator
> for our interpreter):
>
>   ...
>   // put arguments in dval, or lval or u[]
>   ...
>   ...
>   long fptr = (long)&Printf;   // <--- our variadic function
>   __asm__ __volatile__("movlpd %0, %%xmm0"  :: "m" (dval[0]) : "%xmm0");
>   __asm__ __volatile__("movlpd %0, %%xmm1"  :: "m" (dval[1]) : "%xmm1");
>   __asm__ __volatile__("movlpd %0, %%xmm2"  :: "m" (dval[2]) : "%xmm2");
>   __asm__ __volatile__("movlpd %0, %%xmm3"  :: "m" (dval[3]) : "%xmm3");
>   __asm__ __volatile__("movlpd %0, %%xmm4"  :: "m" (dval[4]) : "%xmm4");
>   __asm__ __volatile__("movlpd %0, %%xmm5"  :: "m" (dval[5]) : "%xmm5");
>   __asm__ __volatile__("movlpd %0, %%xmm6"  :: "m" (dval[6]) : "%xmm6");
>   __asm__ __volatile__("movlpd %0, %%xmm7"  :: "m" (dval[7]) : "%xmm7");
>   __asm__ __volatile__("movq %0, %%rdi" :: "m" (lval[0]) : "%rdi");
>   __asm__ __volatile__("movq %0, %%rsi" :: "m" (lval[1]) : "%rsi");
>   __asm__ __volatile__("movq %0, %%rdx" :: "m" (lval[2]) : "%rdx");
>   __asm__ __volatile__("movq %0, %%rcx" :: "m" (lval[3]) : "%rcx");
>   __asm__ __volatile__("movq %0, %%r8"  :: "m" (lval[4]) : "%r8");
>   __asm__ __volatile__("movq %0, %%r9"  :: "m" (lval[5]) : "%r9");
>   __asm__ __volatile__("movq %0, %%r10" :: "m" (fptr) : "%r10");
>   // (umax+2)*8 = 176
>   __asm__ __volatile__("subq $176, %rsp");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 0(%%rsp)" :: "m" (u[0].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 8(%%rsp)" :: "m" (u[1].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 16(%%rsp)" :: "m" (u[2].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 24(%%rsp)" :: "m" (u[3].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 32(%%rsp)" :: "m" (u[4].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 40(%%rsp)" :: "m" (u[5].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 48(%%rsp)" :: "m" (u[6].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 56(%%rsp)" :: "m" (u[7].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 64(%%rsp)" :: "m" (u[8].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 72(%%rsp)" :: "m" (u[9].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 80(%%rsp)" :: "m" (u[10].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 88(%%rsp)" :: "m" (u[11].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 96(%%rsp)" :: "m" (u[12].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 104(%%rsp)" :: "m" (u[13].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 112(%%rsp)" :: "m" (u[14].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 120(%%rsp)" :: "m" (u[15].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 128(%%rsp)" :: "m" (u[16].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 136(%%rsp)" :: "m" (u[17].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 144(%%rsp)" :: "m" (u[18].lval) : "%rax");
>   __asm__ __volatile__("movq %0, %%rax \n\t"
>                        "movq %%rax, 152(%%rsp)" :: "m" (u[19].lval) : "%rax");
>   __asm__ __volatile__("movl $8, %eax");  // number of used xmm registers
>   __asm__ __volatile__("call *%r10");
>   __asm__ __volatile__("addq $176, %rsp");
>   __asm__ __volatile__("movq %%rax, %0" : "=m" (u[0].lval) :: "memory");
> // get return value
>   __asm__ __volatile__("movq %%rdi, %0" : "=m" (u[1].lval) :: "memory");
> // get return value (icc C++ object)
>       G__letint(result7, 67, (long) u[0].lval);
>    return(1 || funcname || hash || result7 || libp) ;
> }
>
> with the trunk version this call into Printf() fails. While it used to work
> and while it still works with gcc 4.1 - 4.5. Does anybody know if something
> in the area has changed on purpose or if this is a regression?

Why put everything in separate __asm__ statements? It might be less
fragile if you'd put everything in a single one with appropriate
constraints.

For instance, even though your early __asm__ statements clobber the
parameter registers (and %r10), the compiler is under no obligation to
*preserve* their new values until your call statement. At the very
least, try to replace those (and setting %eax to 8) with proper
constraints on the __asm__ containing the "call".

I'd also say -fomit-frame-pointer might break your code since you're
manipulating %rsp manually so you might want to make sure to use
-fno-omit-frame-pointer so the "m" constraints hopefully won't use
%rsp-relative addresses. (assuming lval, dval and/or u[] are on the
stack)

If you want to know what exactly is going on, I'd suggest compiling
with -S (or using a disassembler) to see whether any instructions get
inserted between your __asm__ statements, and what kind of addresses
the memory constraints generate.


Alternatively, you could just replace all of that asm with something like
  struct retval { int64_t a; int64_t b; };
  retval ret = ((retval (*)(...))fptr)(dval[0], ..., dval[7], lval[0],
..., lval[5], u[0].lval, ... u[19].lval);
  u[0].lval = ret.a;
  u[1].lval = ret.b;
and let your compiler figure out the best way to do this instead of
trying to re-implement the ABI manually...




More information about the cfe-dev mailing list