<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Suboptimal codegen for binary search"
   href="https://bugs.llvm.org/show_bug.cgi?id=43361">43361</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Suboptimal codegen for binary search
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Scalar Optimizations
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>david.bolvansky@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Created <span class=""><a href="attachment.cgi?id=22529" name="attach_22529" title="microbenchmark">attachment 22529</a> <a href="attachment.cgi?id=22529&action=edit" title="microbenchmark">[details]</a></span>
microbenchmark

Consider
int bs(int a[], int low, int high, int find)
{
   int middle;
   while( low <= high )
   {
      middle = ( low + high ) / 2;

      if ( find == a[middle])
         return middle;

      else if ( find < a[middle])
         high = middle - 1;

      else
      // Variant2:
      // else if ( find > a[middle])
         low = middle + 1;
   }

   return -1;
}

bs(int*, int, int, int):                             # @bs(int*, int, int, int)
        mov     eax, -1
        cmp     esi, edx
        jle     .LBB0_2
.LBB0_4:
        ret
.LBB0_7:                                #   in Loop: Header=BB0_2 Depth=1
        add     r8d, 1
        mov     esi, r8d
        cmp     esi, edx
        jg      .LBB0_4
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        lea     r9d, [rsi + rdx]
        mov     r8d, r9d
        shr     r8d, 31
        add     r8d, r9d
        sar     r8d
        movsxd  r9, r8d
        mov     r9d, dword ptr [rdi + 4*r9]
        cmp     r9d, ecx
        je      .LBB0_3
        cmp     r9d, ecx
        jle     .LBB0_7
        add     r8d, -1
        mov     edx, r8d
        cmp     esi, edx
        jle     .LBB0_2
        jmp     .LBB0_4
.LBB0_3:
        mov     eax, r8d
        ret

1) why jmp .LBB0_4? just "ret" ?
2) this codegen looks very supoptimal

        cmp     r9d, ecx
        je      .LBB0_3
        cmp     r9d, ecx
        jle     .LBB0_7



gcc9 -O3
time ./a.out
Element is present at index 3
real    0m0,226s
user    0m0,221s
sys     0m0,005s



clang trunk -O3 
time ./a.out
Element is present at index 3
real    0m0,290s
user    0m0,286s
sys     0m0,005s

clang trunk -O3 Variant 2 (codegen is full of cmovs)
time ./a.out
Element is present at index 3
real    0m0,732s
user    0m0,728s
sys     0m0,004s

-mllvm -phi-node-folding-threshold=1 - No changes in perf.
-mllvm -phi-node-folding-threshold=0 - No cmovs, better perf:
time ./a.out 
Element is present at index 3
real    0m0,293s
user    0m0,289s
sys     0m0,004s


Tested on Intel Haswell</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>