[cfe-dev] Has something changed in variadic function call ABI...?

Fons Rademakers Fons.Rademakers at cern.ch
Tue Dec 14 07:20:31 PST 2010


Hoi Frits,

    this might work using C++, I'll look into it, but I remembered I had 
problems with pointers to virtual functions, but again it was quite a while 
ago I wrote this stuff and since then it has been working, with all x86_64 
compilers including clang++ till a few days ago, so something definitely 
changed in the clang abi.

Cheers, Fons.



On 14/12/2010 15:19, Frits van Bommel wrote:
> On Tue, Dec 14, 2010 at 2:50 PM, Fons Rademakers
> <Fons.Rademakers at cern.ch>  wrote:
>>   the trunk version fails in executing code that used to work a few weeks
>> ago for code that constructs variadic function calls in x86_64. Here an
>> example of our code (which is generated by a function call stub generator
>> for our interpreter):
>>
>>    ...
>>    // put arguments in dval, or lval or u[]
>>    ...
>>    ...
>>    long fptr = (long)&Printf;   //<--- our variadic function
>>    __asm__ __volatile__("movlpd %0, %%xmm0"  :: "m" (dval[0]) : "%xmm0");
>>    __asm__ __volatile__("movlpd %0, %%xmm1"  :: "m" (dval[1]) : "%xmm1");
>>    __asm__ __volatile__("movlpd %0, %%xmm2"  :: "m" (dval[2]) : "%xmm2");
>>    __asm__ __volatile__("movlpd %0, %%xmm3"  :: "m" (dval[3]) : "%xmm3");
>>    __asm__ __volatile__("movlpd %0, %%xmm4"  :: "m" (dval[4]) : "%xmm4");
>>    __asm__ __volatile__("movlpd %0, %%xmm5"  :: "m" (dval[5]) : "%xmm5");
>>    __asm__ __volatile__("movlpd %0, %%xmm6"  :: "m" (dval[6]) : "%xmm6");
>>    __asm__ __volatile__("movlpd %0, %%xmm7"  :: "m" (dval[7]) : "%xmm7");
>>    __asm__ __volatile__("movq %0, %%rdi" :: "m" (lval[0]) : "%rdi");
>>    __asm__ __volatile__("movq %0, %%rsi" :: "m" (lval[1]) : "%rsi");
>>    __asm__ __volatile__("movq %0, %%rdx" :: "m" (lval[2]) : "%rdx");
>>    __asm__ __volatile__("movq %0, %%rcx" :: "m" (lval[3]) : "%rcx");
>>    __asm__ __volatile__("movq %0, %%r8"  :: "m" (lval[4]) : "%r8");
>>    __asm__ __volatile__("movq %0, %%r9"  :: "m" (lval[5]) : "%r9");
>>    __asm__ __volatile__("movq %0, %%r10" :: "m" (fptr) : "%r10");
>>    // (umax+2)*8 = 176
>>    __asm__ __volatile__("subq $176, %rsp");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 0(%%rsp)" :: "m" (u[0].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 8(%%rsp)" :: "m" (u[1].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 16(%%rsp)" :: "m" (u[2].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 24(%%rsp)" :: "m" (u[3].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 32(%%rsp)" :: "m" (u[4].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 40(%%rsp)" :: "m" (u[5].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 48(%%rsp)" :: "m" (u[6].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 56(%%rsp)" :: "m" (u[7].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 64(%%rsp)" :: "m" (u[8].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 72(%%rsp)" :: "m" (u[9].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 80(%%rsp)" :: "m" (u[10].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 88(%%rsp)" :: "m" (u[11].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 96(%%rsp)" :: "m" (u[12].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 104(%%rsp)" :: "m" (u[13].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 112(%%rsp)" :: "m" (u[14].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 120(%%rsp)" :: "m" (u[15].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 128(%%rsp)" :: "m" (u[16].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 136(%%rsp)" :: "m" (u[17].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 144(%%rsp)" :: "m" (u[18].lval) : "%rax");
>>    __asm__ __volatile__("movq %0, %%rax \n\t"
>>                         "movq %%rax, 152(%%rsp)" :: "m" (u[19].lval) : "%rax");
>>    __asm__ __volatile__("movl $8, %eax");  // number of used xmm registers
>>    __asm__ __volatile__("call *%r10");
>>    __asm__ __volatile__("addq $176, %rsp");
>>    __asm__ __volatile__("movq %%rax, %0" : "=m" (u[0].lval) :: "memory");
>> // get return value
>>    __asm__ __volatile__("movq %%rdi, %0" : "=m" (u[1].lval) :: "memory");
>> // get return value (icc C++ object)
>>        G__letint(result7, 67, (long) u[0].lval);
>>     return(1 || funcname || hash || result7 || libp) ;
>> }
>>
>> with the trunk version this call into Printf() fails. While it used to work
>> and while it still works with gcc 4.1 - 4.5. Does anybody know if something
>> in the area has changed on purpose or if this is a regression?
>
> Why put everything in separate __asm__ statements? It might be less
> fragile if you'd put everything in a single one with appropriate
> constraints.
>
> For instance, even though your early __asm__ statements clobber the
> parameter registers (and %r10), the compiler is under no obligation to
> *preserve* their new values until your call statement. At the very
> least, try to replace those (and setting %eax to 8) with proper
> constraints on the __asm__ containing the "call".
>
> I'd also say -fomit-frame-pointer might break your code since you're
> manipulating %rsp manually so you might want to make sure to use
> -fno-omit-frame-pointer so the "m" constraints hopefully won't use
> %rsp-relative addresses. (assuming lval, dval and/or u[] are on the
> stack)
>
> If you want to know what exactly is going on, I'd suggest compiling
> with -S (or using a disassembler) to see whether any instructions get
> inserted between your __asm__ statements, and what kind of addresses
> the memory constraints generate.
>
>
> Alternatively, you could just replace all of that asm with something like
>    struct retval { int64_t a; int64_t b; };
>    retval ret = ((retval (*)(...))fptr)(dval[0], ..., dval[7], lval[0],
> ..., lval[5], u[0].lval, ... u[19].lval);
>    u[0].lval = ret.a;
>    u[1].lval = ret.b;
> and let your compiler figure out the best way to do this instead of
> trying to re-implement the ABI manually...

-- 
Org:    CERN, European Laboratory for Particle Physics.
Mail:   1211 Geneve 23, Switzerland
E-Mail: Fons.Rademakers at cern.ch              Phone: +41 22 7679248
WWW:    http://fons.rademakers.org           Fax:   +41 22 7669640



More information about the cfe-dev mailing list