[cfe-dev] Has something changed in variadic function call ABI...?
Frits van Bommel
fvbommel at gmail.com
Tue Dec 14 06:19:32 PST 2010
On Tue, Dec 14, 2010 at 2:50 PM, Fons Rademakers
<Fons.Rademakers at cern.ch> wrote:
> the trunk version fails in executing code that used to work a few weeks
> ago for code that constructs variadic function calls in x86_64. Here an
> example of our code (which is generated by a function call stub generator
> for our interpreter):
>
> ...
> // put arguments in dval, or lval or u[]
> ...
> ...
> long fptr = (long)&Printf; // <--- our variadic function
> __asm__ __volatile__("movlpd %0, %%xmm0" :: "m" (dval[0]) : "%xmm0");
> __asm__ __volatile__("movlpd %0, %%xmm1" :: "m" (dval[1]) : "%xmm1");
> __asm__ __volatile__("movlpd %0, %%xmm2" :: "m" (dval[2]) : "%xmm2");
> __asm__ __volatile__("movlpd %0, %%xmm3" :: "m" (dval[3]) : "%xmm3");
> __asm__ __volatile__("movlpd %0, %%xmm4" :: "m" (dval[4]) : "%xmm4");
> __asm__ __volatile__("movlpd %0, %%xmm5" :: "m" (dval[5]) : "%xmm5");
> __asm__ __volatile__("movlpd %0, %%xmm6" :: "m" (dval[6]) : "%xmm6");
> __asm__ __volatile__("movlpd %0, %%xmm7" :: "m" (dval[7]) : "%xmm7");
> __asm__ __volatile__("movq %0, %%rdi" :: "m" (lval[0]) : "%rdi");
> __asm__ __volatile__("movq %0, %%rsi" :: "m" (lval[1]) : "%rsi");
> __asm__ __volatile__("movq %0, %%rdx" :: "m" (lval[2]) : "%rdx");
> __asm__ __volatile__("movq %0, %%rcx" :: "m" (lval[3]) : "%rcx");
> __asm__ __volatile__("movq %0, %%r8" :: "m" (lval[4]) : "%r8");
> __asm__ __volatile__("movq %0, %%r9" :: "m" (lval[5]) : "%r9");
> __asm__ __volatile__("movq %0, %%r10" :: "m" (fptr) : "%r10");
> // (umax+2)*8 = 176
> __asm__ __volatile__("subq $176, %rsp");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 0(%%rsp)" :: "m" (u[0].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 8(%%rsp)" :: "m" (u[1].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 16(%%rsp)" :: "m" (u[2].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 24(%%rsp)" :: "m" (u[3].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 32(%%rsp)" :: "m" (u[4].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 40(%%rsp)" :: "m" (u[5].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 48(%%rsp)" :: "m" (u[6].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 56(%%rsp)" :: "m" (u[7].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 64(%%rsp)" :: "m" (u[8].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 72(%%rsp)" :: "m" (u[9].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 80(%%rsp)" :: "m" (u[10].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 88(%%rsp)" :: "m" (u[11].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 96(%%rsp)" :: "m" (u[12].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 104(%%rsp)" :: "m" (u[13].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 112(%%rsp)" :: "m" (u[14].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 120(%%rsp)" :: "m" (u[15].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 128(%%rsp)" :: "m" (u[16].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 136(%%rsp)" :: "m" (u[17].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 144(%%rsp)" :: "m" (u[18].lval) : "%rax");
> __asm__ __volatile__("movq %0, %%rax \n\t"
> "movq %%rax, 152(%%rsp)" :: "m" (u[19].lval) : "%rax");
> __asm__ __volatile__("movl $8, %eax"); // number of used xmm registers
> __asm__ __volatile__("call *%r10");
> __asm__ __volatile__("addq $176, %rsp");
> __asm__ __volatile__("movq %%rax, %0" : "=m" (u[0].lval) :: "memory");
> // get return value
> __asm__ __volatile__("movq %%rdi, %0" : "=m" (u[1].lval) :: "memory");
> // get return value (icc C++ object)
> G__letint(result7, 67, (long) u[0].lval);
> return(1 || funcname || hash || result7 || libp) ;
> }
>
> with the trunk version this call into Printf() fails. While it used to work
> and while it still works with gcc 4.1 - 4.5. Does anybody know if something
> in the area has changed on purpose or if this is a regression?
Why put everything in separate __asm__ statements? It might be less
fragile if you'd put everything in a single one with appropriate
constraints.
For instance, even though your early __asm__ statements clobber the
parameter registers (and %r10), the compiler is under no obligation to
*preserve* their new values until your call statement. At the very
least, try to replace those (and setting %eax to 8) with proper
constraints on the __asm__ containing the "call".
I'd also say -fomit-frame-pointer might break your code since you're
manipulating %rsp manually so you might want to make sure to use
-fno-omit-frame-pointer so the "m" constraints hopefully won't use
%rsp-relative addresses. (assuming lval, dval and/or u[] are on the
stack)
If you want to know what exactly is going on, I'd suggest compiling
with -S (or using a disassembler) to see whether any instructions get
inserted between your __asm__ statements, and what kind of addresses
the memory constraints generate.
Alternatively, you could just replace all of that asm with something like
struct retval { int64_t a; int64_t b; };
retval ret = ((retval (*)(...))fptr)(dval[0], ..., dval[7], lval[0],
..., lval[5], u[0].lval, ... u[19].lval);
u[0].lval = ret.a;
u[1].lval = ret.b;
and let your compiler figure out the best way to do this instead of
trying to re-implement the ABI manually...
More information about the cfe-dev
mailing list