<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Inline assembly input operand inefficiency"
   href="https://bugs.llvm.org/show_bug.cgi?id=36811">36811</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Inline assembly input operand inefficiency
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>6.0
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>nruslan_devel@yahoo.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>I have noticed that whenever "mr" is specified for an assembly input operand
which may accept both memory and register, clang/llvm does not seem to generate
efficient code.

(-O2 is used for all examples)

For example,

1.

unsigned long func(unsigned long x)
{
    unsigned long r;
    asm ("bsf %1, %0"
        : "=r" (r)
        : "mr" (x)
        : "cc");
    return r;
}

generates code which unnecessarily moves %rdi to memory

func:                                   # @func
    .cfi_startproc
# %bb.0:
    movq    %rdi, -8(%rsp)
    #APP
    bsfq    -8(%rsp), %rax
    #NO_APP
    retq

2. whereas, if we change "mr" to simply "r" (for x)

we get optimal code

func:                                   # @func
    .cfi_startproc
# %bb.0:
    #APP
    bsfq    %rdi, %rax
    #NO_APP
    retq


3. gcc generates optimal code in both cases

func:
.LFB0:
    .cfi_startproc
#APP
# 4 "1.c" 1
    bsf %rdi, %rax
# 0 "" 2
#NO_APP
    ret</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>