<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - Missed optimization: no optimization over loop boundaries"
   href="https://bugs.llvm.org/show_bug.cgi?id=32523">32523</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>Missed optimization: no optimization over loop boundaries
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>Polly
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>unspecified
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>Linux
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Optimizer
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>polly-dev@googlegroups.com
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>antoshkka@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Consider the following example:

///////////////
using size_t = unsigned long;

template <class T>
struct shared_ptr_like {
    T* ptr;
    void foo() noexcept { ptr = 0; }
    void bar() noexcept { if (ptr) { delete ptr; } }
};

typedef shared_ptr_like<int> test_type;
void relocate(test_type* new_buffer, size_t size) {
    for (size_t i = 0; i != size; ++i) {
        new_buffer[i].foo();
    }
    for (size_t i = 0; i != size; ++i) {
        new_buffer[i].bar();
    }
}
///////////////

It produces the following assembly:

relocate(shared_ptr_like<int>*, unsigned long):      #
@relocate(shared_ptr_like<int>*, unsigned long)
        push    r14
        push    rbx
        push    rax
        mov     r14, rsi
        mov     rbx, rdi
        test    r14, r14
        je      .LBB0_5
        lea     rdx, [8*r14]
        xor     esi, esi
        mov     rdi, rbx
        call    memset
.LBB0_2:                                # =>This Inner Loop Header: Depth=1
        mov     rdi, qword ptr [rbx]
        test    rdi, rdi
        je      .LBB0_4
        call    operator delete(void*)
.LBB0_4:                                #   in Loop: Header=BB0_2 Depth=1
        add     rbx, 8
        dec     r14
        jne     .LBB0_2
.LBB0_5:
        add     rsp, 8
        pop     rbx
        pop     r14
        ret

However the optimal assembly would be:

relocate(shared_ptr_like<int>*, unsigned long):      #
@relocate(shared_ptr_like<int>*, unsigned long)
        mov     rax, rsi
        test    rax, rax
        je      .LBB0_2
        push    rax
        shl     rax, 3
        xor     esi, esi
        mov     rdx, rax
        call    memset
        add     rsp, 8
.LBB0_2:
        ret

Assembly from above could be produced by following code that just hase a single
loop instead of two:

typedef shared_ptr_like<int> test_type;
void relocate(test_type* new_buffer, size_t size) {
    for (size_t i = 0; i != size; ++i) { // Single loop instead of two
        new_buffer[i].foo();
        new_buffer[i].bar();
    }
}


Optimizing over loop boundries is essential because multiple parts of C++
standard library traverse the same data twice. For example GCC's
std::vector::reserve has the following code:

  pointer __tmp = _M_allocate_and_copy(__n,
    _GLIBCXX_MAKE_MOVE_IF_NOEXCEPT_ITERATOR(this->_M_impl._M_start),
    _GLIBCXX_MAKE_MOVE_IF_NOEXCEPT_ITERATOR(this->_M_impl._M_finish));
  std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish,
                _M_get_Tp_allocator()); 



Checked on CLANG 4.0 (tags/RELEASE_400/final 297782) with flags -std=c++1z -O2</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>