[LLVMdev] Tail Call Optimisation
Jeffrey Yasskin
jyasskin at google.com
Mon Jan 4 10:06:37 PST 2010
On Mon, Jan 4, 2010 at 5:03 AM, Jon Harrop <jon at ffconsultancy.com> wrote:
> On Monday 04 January 2010 05:16:40 Jeffrey Yasskin wrote:
>> On Sun, Jan 3, 2010 at 10:50 PM, Jon Harrop <jon at ffconsultancy.com> wrote:
>> > LLVM's TCO already handles mutual recursion.
>>
>> Only for fastcc functions
>
> Yes.
>
>> compiled with -tailcallopt, right?
>
> If you use the compiler, yes.
>
>> http://llvm.org/docs/CodeGenerator.html#tailcallopt
>>
>> I believe gcc manages to support tail calls in many more cases, and
>> this restriction in llvm confuses lots of newcomers. It would be very
>> worthwhile if someone wanted to remove it.
>
> That's interesting. What tail calls can be supported without changing the
> calling convention
Simon's original example, for one. See below for the C and assembly at
gcc -O3. Not all tail calls can be supported, and I can't find a
recent authoritative list of the exact restrictions, but
http://www.complang.tuwien.ac.at/schani/diplarb.ps has a list from
2001.
> and would it not simply be easier to switch to the fastcc
> convention between internal functions to achieve the same effect from outside
> LLVM?
Possibly, but not all functions can be internal.
> Conversely, if TCO is implemented for the cc convention, what will be
> the point of fastcc?
I don't think we should be omitting optimizations because they might
make a calling convention obsolete. (Although in this case, there are
still some extra calls a different calling convention can make tail
calls, and fastcc still improves the x86-32 call sequence.)
$ cat test.c
#include <stdio.h>
char* a(int n) __attribute__((noinline));
char* b(int n) __attribute__((noinline));
char* a(int n) {
if (n <= 0) {
return "DONE";
} else {
return b(n - 1);
}
}
char* b(int n) {
if (n <= 0) {
return "DONE";
} else {
return a(n - 1);
}
}
int main() {
puts(a(10000000));
return 0;
}
$ gcc -v
Using built-in specs.
Target: i386-apple-darwin9
Configured with: ../gcc-4.4.1/configure --prefix=/opt/local
--build=i386-apple-darwin9
--enable-languages=c,c++,objc,obj-c++,java,fortran
--libdir=/opt/local/lib/gcc44 --includedir=/opt/local/include/gcc44
--infodir=/opt/local/share/info --mandir=/opt/local/share/man
--with-local-prefix=/opt/local --with-system-zlib --disable-nls
--program-suffix=-mp-4.4
--with-gxx-include-dir=/opt/local/include/gcc44/c++/
--with-gmp=/opt/local --with-mpfr=/opt/local
Thread model: posix
gcc version 4.4.1 (GCC)
$ gcc -Wall -O3 test.c -o test.s -S
$ cat test.s
.cstring
LC0:
.ascii "DONE\0"
.text
.align 4,0x90
.globl _b
_b:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl 8(%ebp), %eax
call ___i686.get_pc_thunk.cx
"L00000000001$pb":
testl %eax, %eax
jle L6
subl $1, %eax
movl %eax, 8(%ebp)
leave
jmp _a
.align 4,0x90
L6:
leal LC0-"L00000000001$pb"(%ecx), %eax
leave
ret
.align 4,0x90
.globl _a
_a:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl 8(%ebp), %eax
call ___i686.get_pc_thunk.cx
"L00000000002$pb":
testl %eax, %eax
jle L11
subl $1, %eax
movl %eax, 8(%ebp)
leave
jmp _b
.align 4,0x90
L11:
leal LC0-"L00000000002$pb"(%ecx), %eax
leave
ret
.align 4,0x90
.globl _main
_main:
pushl %ebp
movl %esp, %ebp
subl $24, %esp
movl $10000000, (%esp)
call _a
movl %eax, (%esp)
call L_puts$stub
xorl %eax, %eax
leave
ret
.picsymbol_stub
L_puts$stub:
.indirect_symbol _puts
call LPC$1
LPC$1: popl %eax
movl L1$lz-LPC$1(%eax),%edx
jmp *%edx
L_puts$stub_binder:
lea L1$lz-LPC$1(%eax),%eax
pushl %eax
jmp dyld_stub_binding_helper
.lazy_symbol_pointer
L1$lz:
.indirect_symbol _puts
.long L_puts$stub_binder
.subsections_via_symbols
.section __TEXT,__textcoal_nt,coalesced,pure_instructions
.weak_definition ___i686.get_pc_thunk.cx
.private_extern ___i686.get_pc_thunk.cx
___i686.get_pc_thunk.cx:
movl (%esp), %ecx
ret
$
The results with -m64 are similar.
More information about the llvm-dev
mailing list