<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Missed optimization to remove unnecessary branch from loop entry"
   href="https://bugs.llvm.org/show_bug.cgi?id=50171">50171</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Missed optimization to remove unnecessary branch from loop entry
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>unspecified
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Windows NT
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>C++
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>scovich@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>blitzrakete@gmail.com, dgregor@apple.com, erik.pilkington@gmail.com, llvm-bugs@lists.llvm.org, richard-llvm@metafoo.co.uk
          </td>
        </tr></table>
      <p>
        <div>
        <pre>The following toy example:

  void Loop(int len) {
      int i = 0;
      const int kUnrollFactor = 8;
      for (int num_calls = 0; i <= len - kUnrollFactor; ) {
          if (num_calls + kUnrollFactor > 100) {
              extern void Foo(); Foo();
              num_calls = 0;
          }

          for (int j = 0; j < kUnrollFactor; j++, i++, num_calls++) {
              extern void Bar(int); Bar(i);
          }
      }
  }

Compiles to the following x86 assembly code with clang-9:

  Loop(int):
          ... prolog ...
        mov     r14d, edi
        add     r14d, -8
        js      .LBB0_5
        xor     r15d, r15d         <=== num_calls = 0
        xor     ebx, ebx
        cmp     r15d, 93           <=== num_calls still zero here
        jge     .LBB0_3            <=== branch can NEVER be taken
  .LBB0_4:
          ... unrolled loop body with 8 calls to Bar() ...
        add     r15d, 8
        add     ebx, 8
        cmp     ebp, r14d
        jge     .LBB0_5
        cmp     r15d, 93
        jl      .LBB0_4
  .LBB0_3:
        call    Foo()
        xor     r15d, r15d
        jmp     .LBB0_4
  .LBB0_5:
          ... epilogue ...
        ret

Ideally, the compiler should elide the provably redundant cmp+jge pair, leaving
only the xor:

        xor     r15d, r15d
        cmp     r15d, 93
        jge     .LBB0_3


With clang-12, the result is arguably worse, because the extra branching masks
the missed opportunity altogether:

  Loop(int):
          ... prologue ...
        cmp     edi, 8
        jge     .LBB0_1
  .LBB0_5:
          ... epilogue ...
        ret
  .LBB0_1:
          ... loop initialization ...
        xor     r15d, r15d          <=== num_calls = 0
        xor     ebx, ebx
        jmp     .LBB0_2
  .LBB0_4:
          ... unrolled loop body with 8 calls to Bar() ...
        add     r15d, 8
        add     ebx, 8
        cmp     ebp, r14d
        jge     .LBB0_5
  .LBB0_2:
        cmp     r15d, 93            <=== num_calls = 0 the first time
        jl      .LBB0_4             <=== branch never taken the first time
        call    Foo()
        xor     r15d, r15d
        jmp     .LBB0_4</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>