<html>

    <head>

      <base href="http://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - Poor register allocation compiling GCC inline assembly (x86)"

   href="http://llvm.org/bugs/show_bug.cgi?id=16327">16327</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Poor register allocation compiling GCC inline assembly (x86)

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>All

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: X86

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>lennox@cs.columbia.edu

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvmbugs@cs.uiuc.edu

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Created <span class=""><a href="attachment.cgi?id=10679" name="attach_10679" title="File using GCC inline assembly with lots of inputs">attachment 10679</a> <a href="attachment.cgi?id=10679&action=edit" title="File using GCC inline assembly with lots of inputs">[details]</a></span>

File using GCC inline assembly with lots of inputs

(Note: this is similar to <a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - Bad register allocation compiling GCC inline assembly (ARM)"

   href="show_bug.cgi?id=16326">bug 16326</a>, but for the x86 backend.)

The register allocation to constraints to GCC inline assembly is very poor,

leading to very inefficient code, shuffling values on and off the stack

entirely redundantly, or between the stack and variables.

Consider reg-overload-i386.c (attached).

For 32-bit mode, LLVM generates the following code.  Notice the shuffling of

values off the stack and then back onto it, prior to InlineAsm Start.

_foo:                                   ## @foo

## BB#0:                                ## %entry

    pushl    %ebp

    movl    %esp, %ebp

    pushl    %edi

    pushl    %esi

    subl    $32, %esp

    movl    8(%ebp), %eax

    movl    %eax, -12(%ebp)

    movl    12(%ebp), %eax

    movl    %eax, -16(%ebp)

    movl    16(%ebp), %eax

    movl    %eax, -20(%ebp)

    movl    20(%ebp), %eax

    movl    %eax, -24(%ebp)

    movl    24(%ebp), %eax

    movl    %eax, -28(%ebp)

    movl    28(%ebp), %eax

    movl    %eax, -32(%ebp)

    movl    32(%ebp), %eax

    movl    %eax, -36(%ebp)

    movl    36(%ebp), %eax

    movl    %eax, -40(%ebp)

    ## InlineAsm Start

    mov -12(%ebp), %eax

    mov (%eax), %ecx

    mov -16(%ebp), %edx

    mov -20(%ebp), %edi

    mov -24(%ebp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov -28(%ebp), %edx

    mov -32(%ebp), %edi

    mov -36(%ebp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov -40(%ebp), %edx

    add %edx, %ecx

    mov %ecx, (%eax)

    ## InlineAsm End

    addl    $32, %esp

    popl    %esi

    popl    %edi

    popl    %ebp

    ret

gcc-4.2, by contrast, directly passes the function arguments to the inline

assembly:

    .text

    .align 4,0x90

.globl _foo

_foo:

    pushl    %ebp

    movl    %esp, %ebp

    subl    $8, %esp

    movl    %esi, (%esp)

    movl    %edi, 4(%esp)

    mov 8(%ebp), %eax

    mov (%eax), %ecx

    mov 12(%ebp), %edx

    mov 16(%ebp), %edi

    mov 20(%ebp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov 24(%ebp), %edx

    mov 28(%ebp), %edi

    mov 32(%ebp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov 36(%ebp), %edx

    add %edx, %ecx

    mov %ecx, (%eax)

    movl    (%esp), %esi

    movl    4(%esp), %edi

    leave

    ret

Similarly, on x86_64, clang shuffles the function arguments from registers onto

the stack:

_foo:                                   ## @foo

    .cfi_startproc

## BB#0:                                ## %entry

    pushq    %rbp

Ltmp2:

    .cfi_def_cfa_offset 16

Ltmp3:

    .cfi_offset %rbp, -16

    movq    %rsp, %rbp

Ltmp4:

    .cfi_def_cfa_register %rbp

    movq    %rdi, -8(%rbp)

    movl    %esi, -12(%rbp)

    movl    %edx, -16(%rbp)

    movl    %ecx, -20(%rbp)

    movl    %r8d, -24(%rbp)

    movl    %r9d, -28(%rbp)

    movl    24(%rbp), %eax

    movl    16(%rbp), %ecx

    movl    %ecx, -32(%rbp)

    movl    %eax, -36(%rbp)

    ## InlineAsm Start

    mov -8(%rbp), %eax

    mov (%eax), %ecx

    mov -12(%rbp), %edx

    mov -16(%rbp), %edi

    mov -20(%rbp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov -24(%rbp), %edx

    mov -28(%rbp), %edi

    mov -32(%rbp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov -36(%rbp), %edx

    add %edx, %ecx

    mov %ecx, (%eax)

    ## InlineAsm End

    popq    %rbp

    ret

    .cfi_endproc

Whereas gcc directly uses the registers:

_foo:

LFB2:

    pushq    %rbp

LCFI0:

    movq    %rsp, %rbp

LCFI1:

    movq    %rbx, -16(%rbp)

LCFI2:

    movq    %r12, -8(%rbp)

LCFI3:

    movq    %rdi, %r12

    movl    %esi, %ebx

    movl    %edx, %r11d

    movl    %ecx, %r10d

    mov %r12, %eax

    mov (%eax), %ecx

    mov %ebx, %edx

    mov %r11d, %edi

    mov %r10d, %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov %r8d, %edx

    mov %r9d, %edi

    mov 16(%rbp), %esi

    add %edx, %ecx

    add %edi, %ecx

    add %esi, %ecx

    mov 24(%rbp), %edx

    add %edx, %ecx

    mov %ecx, (%eax)

    movq    -16(%rbp), %rbx

    movq    -8(%rbp), %r12

    leave

    ret

This is clang tip:

$ clang -v

clang version 3.4 (trunk 183951) (llvm/trunk 183950)

Target: x86_64-apple-darwin12.4.0

Thread model: posix</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>