[llvm-bugs] [Bug 43361] New: Suboptimal codegen for binary search

via llvm-bugs llvm-bugs at lists.llvm.org
Thu Sep 19 08:03:27 PDT 2019


https://bugs.llvm.org/show_bug.cgi?id=43361

            Bug ID: 43361
           Summary: Suboptimal codegen for binary search
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: llvm-bugs at lists.llvm.org

Created attachment 22529
  --> https://bugs.llvm.org/attachment.cgi?id=22529&action=edit
microbenchmark

Consider
int bs(int a[], int low, int high, int find)
{
   int middle;
   while( low <= high )
   {
      middle = ( low + high ) / 2;

      if ( find == a[middle])
         return middle;

      else if ( find < a[middle])
         high = middle - 1;

      else
      // Variant2:
      // else if ( find > a[middle])
         low = middle + 1;
   }

   return -1;
}

bs(int*, int, int, int):                             # @bs(int*, int, int, int)
        mov     eax, -1
        cmp     esi, edx
        jle     .LBB0_2
.LBB0_4:
        ret
.LBB0_7:                                #   in Loop: Header=BB0_2 Depth=1
        add     r8d, 1
        mov     esi, r8d
        cmp     esi, edx
        jg      .LBB0_4
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        lea     r9d, [rsi + rdx]
        mov     r8d, r9d
        shr     r8d, 31
        add     r8d, r9d
        sar     r8d
        movsxd  r9, r8d
        mov     r9d, dword ptr [rdi + 4*r9]
        cmp     r9d, ecx
        je      .LBB0_3
        cmp     r9d, ecx
        jle     .LBB0_7
        add     r8d, -1
        mov     edx, r8d
        cmp     esi, edx
        jle     .LBB0_2
        jmp     .LBB0_4
.LBB0_3:
        mov     eax, r8d
        ret

1) why jmp .LBB0_4? just "ret" ?
2) this codegen looks very supoptimal

        cmp     r9d, ecx
        je      .LBB0_3
        cmp     r9d, ecx
        jle     .LBB0_7



gcc9 -O3
time ./a.out
Element is present at index 3
real    0m0,226s
user    0m0,221s
sys     0m0,005s



clang trunk -O3 
time ./a.out
Element is present at index 3
real    0m0,290s
user    0m0,286s
sys     0m0,005s

clang trunk -O3 Variant 2 (codegen is full of cmovs)
time ./a.out
Element is present at index 3
real    0m0,732s
user    0m0,728s
sys     0m0,004s

-mllvm -phi-node-folding-threshold=1 - No changes in perf.
-mllvm -phi-node-folding-threshold=0 - No cmovs, better perf:
time ./a.out 
Element is present at index 3
real    0m0,293s
user    0m0,289s
sys     0m0,004s


Tested on Intel Haswell

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190919/2ed1c265/attachment.html>


More information about the llvm-bugs mailing list