<html>
<head>
<base href="http://llvm.org/bugs/" />
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW --- - Poor register allocation compiling GCC inline assembly (x86)"
href="http://llvm.org/bugs/show_bug.cgi?id=16327">16327</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Poor register allocation compiling GCC inline assembly (x86)
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>lennox@cs.columbia.edu
</td>
</tr>
<tr>
<th>CC</th>
<td>llvmbugs@cs.uiuc.edu
</td>
</tr>
<tr>
<th>Classification</th>
<td>Unclassified
</td>
</tr></table>
<p>
<div>
<pre>Created <span class=""><a href="attachment.cgi?id=10679" name="attach_10679" title="File using GCC inline assembly with lots of inputs">attachment 10679</a> <a href="attachment.cgi?id=10679&action=edit" title="File using GCC inline assembly with lots of inputs">[details]</a></span>
File using GCC inline assembly with lots of inputs
(Note: this is similar to <a class="bz_bug_link
bz_status_NEW "
title="NEW --- - Bad register allocation compiling GCC inline assembly (ARM)"
href="show_bug.cgi?id=16326">bug 16326</a>, but for the x86 backend.)
The register allocation to constraints to GCC inline assembly is very poor,
leading to very inefficient code, shuffling values on and off the stack
entirely redundantly, or between the stack and variables.
Consider reg-overload-i386.c (attached).
For 32-bit mode, LLVM generates the following code. Notice the shuffling of
values off the stack and then back onto it, prior to InlineAsm Start.
_foo: ## @foo
## BB#0: ## %entry
pushl %ebp
movl %esp, %ebp
pushl %edi
pushl %esi
subl $32, %esp
movl 8(%ebp), %eax
movl %eax, -12(%ebp)
movl 12(%ebp), %eax
movl %eax, -16(%ebp)
movl 16(%ebp), %eax
movl %eax, -20(%ebp)
movl 20(%ebp), %eax
movl %eax, -24(%ebp)
movl 24(%ebp), %eax
movl %eax, -28(%ebp)
movl 28(%ebp), %eax
movl %eax, -32(%ebp)
movl 32(%ebp), %eax
movl %eax, -36(%ebp)
movl 36(%ebp), %eax
movl %eax, -40(%ebp)
## InlineAsm Start
mov -12(%ebp), %eax
mov (%eax), %ecx
mov -16(%ebp), %edx
mov -20(%ebp), %edi
mov -24(%ebp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov -28(%ebp), %edx
mov -32(%ebp), %edi
mov -36(%ebp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov -40(%ebp), %edx
add %edx, %ecx
mov %ecx, (%eax)
## InlineAsm End
addl $32, %esp
popl %esi
popl %edi
popl %ebp
ret
gcc-4.2, by contrast, directly passes the function arguments to the inline
assembly:
.text
.align 4,0x90
.globl _foo
_foo:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
movl %esi, (%esp)
movl %edi, 4(%esp)
mov 8(%ebp), %eax
mov (%eax), %ecx
mov 12(%ebp), %edx
mov 16(%ebp), %edi
mov 20(%ebp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov 24(%ebp), %edx
mov 28(%ebp), %edi
mov 32(%ebp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov 36(%ebp), %edx
add %edx, %ecx
mov %ecx, (%eax)
movl (%esp), %esi
movl 4(%esp), %edi
leave
ret
Similarly, on x86_64, clang shuffles the function arguments from registers onto
the stack:
_foo: ## @foo
.cfi_startproc
## BB#0: ## %entry
pushq %rbp
Ltmp2:
.cfi_def_cfa_offset 16
Ltmp3:
.cfi_offset %rbp, -16
movq %rsp, %rbp
Ltmp4:
.cfi_def_cfa_register %rbp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movl %edx, -16(%rbp)
movl %ecx, -20(%rbp)
movl %r8d, -24(%rbp)
movl %r9d, -28(%rbp)
movl 24(%rbp), %eax
movl 16(%rbp), %ecx
movl %ecx, -32(%rbp)
movl %eax, -36(%rbp)
## InlineAsm Start
mov -8(%rbp), %eax
mov (%eax), %ecx
mov -12(%rbp), %edx
mov -16(%rbp), %edi
mov -20(%rbp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov -24(%rbp), %edx
mov -28(%rbp), %edi
mov -32(%rbp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov -36(%rbp), %edx
add %edx, %ecx
mov %ecx, (%eax)
## InlineAsm End
popq %rbp
ret
.cfi_endproc
Whereas gcc directly uses the registers:
_foo:
LFB2:
pushq %rbp
LCFI0:
movq %rsp, %rbp
LCFI1:
movq %rbx, -16(%rbp)
LCFI2:
movq %r12, -8(%rbp)
LCFI3:
movq %rdi, %r12
movl %esi, %ebx
movl %edx, %r11d
movl %ecx, %r10d
mov %r12, %eax
mov (%eax), %ecx
mov %ebx, %edx
mov %r11d, %edi
mov %r10d, %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov %r8d, %edx
mov %r9d, %edi
mov 16(%rbp), %esi
add %edx, %ecx
add %edi, %ecx
add %esi, %ecx
mov 24(%rbp), %edx
add %edx, %ecx
mov %ecx, (%eax)
movq -16(%rbp), %rbx
movq -8(%rbp), %r12
leave
ret
This is clang tip:
$ clang -v
clang version 3.4 (trunk 183951) (llvm/trunk 183950)
Target: x86_64-apple-darwin12.4.0
Thread model: posix</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>