[LLVMdev] Tail Call Optimisation

Jeffrey Yasskin jyasskin at google.com
Mon Jan 4 10:06:37 PST 2010


On Mon, Jan 4, 2010 at 5:03 AM, Jon Harrop <jon at ffconsultancy.com> wrote:
> On Monday 04 January 2010 05:16:40 Jeffrey Yasskin wrote:
>> On Sun, Jan 3, 2010 at 10:50 PM, Jon Harrop <jon at ffconsultancy.com> wrote:
>> > LLVM's TCO already handles mutual recursion.
>>
>> Only for fastcc functions
>
> Yes.
>
>> compiled with -tailcallopt, right?
>
> If you use the compiler, yes.
>
>> http://llvm.org/docs/CodeGenerator.html#tailcallopt
>>
>> I believe gcc manages to support tail calls in many more cases, and
>> this restriction in llvm confuses lots of newcomers. It would be very
>> worthwhile if someone wanted to remove it.
>
> That's interesting. What tail calls can be supported without changing the
> calling convention

Simon's original example, for one. See below for the C and assembly at
gcc -O3. Not all tail calls can be supported, and I can't find a
recent authoritative list of the exact restrictions, but
http://www.complang.tuwien.ac.at/schani/diplarb.ps has a list from
2001.

> and would it not simply be easier to switch to the fastcc
> convention between internal functions to achieve the same effect from outside
> LLVM?

Possibly, but not all functions can be internal.

> Conversely, if TCO is implemented for the cc convention, what will be
> the point of fastcc?

I don't think we should be omitting optimizations because they might
make a calling convention obsolete. (Although in this case, there are
still some extra calls a different calling convention can make tail
calls, and fastcc still improves the x86-32 call sequence.)



$ cat test.c
#include <stdio.h>

char* a(int n) __attribute__((noinline));
char* b(int n) __attribute__((noinline));

char* a(int n) {
  if (n <= 0) {
    return "DONE";
  } else {
    return b(n - 1);
  }
}

char* b(int n) {
  if (n <= 0) {
    return "DONE";
  } else {
    return a(n - 1);
  }
}

int main() {
  puts(a(10000000));
  return 0;
}
$ gcc -v
Using built-in specs.
Target: i386-apple-darwin9
Configured with: ../gcc-4.4.1/configure --prefix=/opt/local
--build=i386-apple-darwin9
--enable-languages=c,c++,objc,obj-c++,java,fortran
--libdir=/opt/local/lib/gcc44 --includedir=/opt/local/include/gcc44
--infodir=/opt/local/share/info --mandir=/opt/local/share/man
--with-local-prefix=/opt/local --with-system-zlib --disable-nls
--program-suffix=-mp-4.4
--with-gxx-include-dir=/opt/local/include/gcc44/c++/
--with-gmp=/opt/local --with-mpfr=/opt/local
Thread model: posix
gcc version 4.4.1 (GCC)
$ gcc -Wall -O3 test.c -o test.s -S
$ cat test.s
	.cstring
LC0:
	.ascii "DONE\0"
	.text
	.align 4,0x90
.globl _b
_b:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$8, %esp
	movl	8(%ebp), %eax
	call	___i686.get_pc_thunk.cx
"L00000000001$pb":
	testl	%eax, %eax
	jle	L6
	subl	$1, %eax
	movl	%eax, 8(%ebp)
	leave
	jmp	_a
	.align 4,0x90
L6:
	leal	LC0-"L00000000001$pb"(%ecx), %eax
	leave
	ret
	.align 4,0x90
.globl _a
_a:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$8, %esp
	movl	8(%ebp), %eax
	call	___i686.get_pc_thunk.cx
"L00000000002$pb":
	testl	%eax, %eax
	jle	L11
	subl	$1, %eax
	movl	%eax, 8(%ebp)
	leave
	jmp	_b
	.align 4,0x90
L11:
	leal	LC0-"L00000000002$pb"(%ecx), %eax
	leave
	ret
	.align 4,0x90
.globl _main
_main:
	pushl	%ebp
	movl	%esp, %ebp
	subl	$24, %esp
	movl	$10000000, (%esp)
	call	_a
	movl	%eax, (%esp)
	call	L_puts$stub
	xorl	%eax, %eax
	leave
	ret
	.picsymbol_stub
L_puts$stub:
	.indirect_symbol _puts
	call	LPC$1
LPC$1:	popl	%eax
	movl	L1$lz-LPC$1(%eax),%edx
	jmp	*%edx
L_puts$stub_binder:
	lea	L1$lz-LPC$1(%eax),%eax
	pushl	%eax
	jmp	dyld_stub_binding_helper
	.lazy_symbol_pointer
L1$lz:
	.indirect_symbol _puts
	.long L_puts$stub_binder
	.subsections_via_symbols
	.section __TEXT,__textcoal_nt,coalesced,pure_instructions
	.weak_definition	___i686.get_pc_thunk.cx
	.private_extern	___i686.get_pc_thunk.cx
___i686.get_pc_thunk.cx:
	movl	(%esp), %ecx
	ret
$

The results with -m64 are similar.



More information about the llvm-dev mailing list