<html>
    <head>
      <base href="https://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - Regression: unnecessary mov in a tight loop"
   href="https://llvm.org/bugs/show_bug.cgi?id=30339">30339</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Regression: unnecessary mov in a tight loop
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>new-bugs
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>new bugs
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>krasin@google.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Created <span class=""><a href="attachment.cgi?id=17231" name="attach_17231" title="loop.cc">attachment 17231</a> <a href="attachment.cgi?id=17231&action=edit" title="loop.cc">[details]</a></span>
loop.cc

Recently, Chrome observed a significant (~15%) regression after rolling the new
version of Clang (r280106, trunk at the time), see <a href="https://crbug.com/643724">https://crbug.com/643724</a>.

A minimal repro has been extracted (attached). To reproduce the issue:

clang++ -o loop loop.cc -fuse-ld=gold -O2  -flto 

Note that using LTO seems to be the trigger for the bug (but not necessarily
the reason).

Before regression, r278861:

00000000004006e0 <_Z31absoluteColumnToEffectiveColumnj>:
  4006e0:       48 8b 15 61 19 00 00    mov    0x1961(%rip),%rdx        #
402048 <m_effectiveColumns+0x8>
  4006e7:       4c 8b 05 52 19 00 00    mov    0x1952(%rip),%r8        # 402040
<m_effectiveColumns>
  4006ee:       4c 29 c2                sub    %r8,%rdx
  4006f1:       48 c1 ea 02             shr    $0x2,%rdx
  4006f5:       31 c0                   xor    %eax,%eax
  4006f7:       85 d2                   test   %edx,%edx
  4006f9:       74 2e                   je     400729
<_Z31absoluteColumnToEffectiveColumnj+0x49>
  4006fb:       89 d2                   mov    %edx,%edx
  4006fd:       31 c0                   xor    %eax,%eax
  4006ff:       31 f6                   xor    %esi,%esi
  400701:       66 66 66 66 66 66 2e    data32 data32 data32 data32 data32 nopw
%cs:0x0(%rax,%rax,1)
  400708:       0f 1f 84 00 00 00 00 
  40070f:       00 
  400710:       41 8b 3c 80             mov    (%r8,%rax,4),%edi #
<=================== Everything is good here
  400714:       8d 4c 37 ff             lea    -0x1(%rdi,%rsi,1),%ecx
  400718:       83 f9 0a                cmp    $0xa,%ecx
  40071b:       73 0c                   jae    400729
<_Z31absoluteColumnToEffectiveColumnj+0x49>
  40071d:       01 f7                   add    %esi,%edi
  40071f:       48 ff c0                inc    %rax
  400722:       48 39 d0                cmp    %rdx,%rax
  400725:       89 fe                   mov    %edi,%esi
  400727:       72 e7                   jb     400710
<_Z31absoluteColumnToEffectiveColumnj+0x30>
  400729:       c3                      retq   
  40072a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

After regression, r280106:

00000000004006e0 <_Z31absoluteColumnToEffectiveColumnj>:
  4006e0:       48 8b 0d 61 19 00 00    mov    0x1961(%rip),%rcx        #
402048 <m_effectiveColumns+0x8>
  4006e7:       4c 8b 05 52 19 00 00    mov    0x1952(%rip),%r8        # 402040
<m_effectiveColumns>
  4006ee:       4c 29 c1                sub    %r8,%rcx
  4006f1:       48 c1 e9 02             shr    $0x2,%rcx
  4006f5:       31 c0                   xor    %eax,%eax
  4006f7:       85 c9                   test   %ecx,%ecx
  4006f9:       74 1e                   je     400719
<_Z31absoluteColumnToEffectiveColumnj+0x39>
  4006fb:       31 f6                   xor    %esi,%esi
  4006fd:       31 c0                   xor    %eax,%eax
  4006ff:       90                      nop
  400700:       89 c7                   mov    %eax,%edi #
<============================== unnecessary mov
  400702:       41 8b 3c b8             mov    (%r8,%rdi,4),%edi
  400706:       8d 54 37 ff             lea    -0x1(%rdi,%rsi,1),%edx
  40070a:       83 fa 0a                cmp    $0xa,%edx
  40070d:       73 0a                   jae    400719
<_Z31absoluteColumnToEffectiveColumnj+0x39>
  40070f:       01 f7                   add    %esi,%edi
  400711:       ff c0                   inc    %eax
  400713:       39 c8                   cmp    %ecx,%eax
  400715:       89 fe                   mov    %edi,%esi
  400717:       72 e7                   jb     400700
<_Z31absoluteColumnToEffectiveColumnj+0x20>
  400719:       c3                      retq   
  40071a:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)


As you can see, there's an unnecessary mov in the newer code.

I have also verified that the issue persists in the current trunk (r280929):

00000000004006c0 <_Z31absoluteColumnToEffectiveColumnj>:
  4006c0:       48 8b 0d 81 19 00 00    mov    0x1981(%rip),%rcx        #
402048 <m_effectiveColumns+0x8>
  4006c7:       4c 8b 05 72 19 00 00    mov    0x1972(%rip),%r8        # 402040
<m_effectiveColumns>
  4006ce:       4c 29 c1                sub    %r8,%rcx
  4006d1:       48 c1 e9 02             shr    $0x2,%rcx
  4006d5:       31 c0                   xor    %eax,%eax
  4006d7:       85 c9                   test   %ecx,%ecx
  4006d9:       74 1e                   je     4006f9
<_Z31absoluteColumnToEffectiveColumnj+0x39>
  4006db:       31 f6                   xor    %esi,%esi
  4006dd:       31 c0                   xor    %eax,%eax
  4006df:       90                      nop
  4006e0:       89 c7                   mov    %eax,%edi #
<================================ unnecessary mov
  4006e2:       41 8b 3c b8             mov    (%r8,%rdi,4),%edi
  4006e6:       8d 54 37 ff             lea    -0x1(%rdi,%rsi,1),%edx
  4006ea:       83 fa 0a                cmp    $0xa,%edx
  4006ed:       73 0a                   jae    4006f9
<_Z31absoluteColumnToEffectiveColumnj+0x39>
  4006ef:       01 f7                   add    %esi,%edi
  4006f1:       ff c0                   inc    %eax
  4006f3:       39 c8                   cmp    %ecx,%eax
  4006f5:       89 fe                   mov    %edi,%esi
  4006f7:       72 e7                   jb     4006e0
<_Z31absoluteColumnToEffectiveColumnj+0x20>
  4006f9:       c3                      retq   
  4006fa:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>