<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Missed optimization: no optimization over loop boundaries"

   href="https://bugs.llvm.org/show_bug.cgi?id=32523">32523</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Missed optimization: no optimization over loop boundaries

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>Polly

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>unspecified

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Optimizer

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>polly-dev@googlegroups.com

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>antoshkka@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Consider the following example:

///////////////

using size_t = unsigned long;

template <class T>

struct shared_ptr_like {

    T* ptr;

    void foo() noexcept { ptr = 0; }

    void bar() noexcept { if (ptr) { delete ptr; } }

};

typedef shared_ptr_like<int> test_type;

void relocate(test_type* new_buffer, size_t size) {

    for (size_t i = 0; i != size; ++i) {

        new_buffer[i].foo();

    }

    for (size_t i = 0; i != size; ++i) {

        new_buffer[i].bar();

    }

}

///////////////

It produces the following assembly:

relocate(shared_ptr_like<int>*, unsigned long):      #

@relocate(shared_ptr_like<int>*, unsigned long)

        push    r14

        push    rbx

        push    rax

        mov     r14, rsi

        mov     rbx, rdi

        test    r14, r14

        je      .LBB0_5

        lea     rdx, [8*r14]

        xor     esi, esi

        mov     rdi, rbx

        call    memset

.LBB0_2:                                # =>This Inner Loop Header: Depth=1

        mov     rdi, qword ptr [rbx]

        test    rdi, rdi

        je      .LBB0_4

        call    operator delete(void*)

.LBB0_4:                                #   in Loop: Header=BB0_2 Depth=1

        add     rbx, 8

        dec     r14

        jne     .LBB0_2

.LBB0_5:

        add     rsp, 8

        pop     rbx

        pop     r14

        ret

However the optimal assembly would be:

relocate(shared_ptr_like<int>*, unsigned long):      #

@relocate(shared_ptr_like<int>*, unsigned long)

        mov     rax, rsi

        test    rax, rax

        je      .LBB0_2

        push    rax

        shl     rax, 3

        xor     esi, esi

        mov     rdx, rax

        call    memset

        add     rsp, 8

.LBB0_2:

        ret

Assembly from above could be produced by following code that just hase a single

loop instead of two:

typedef shared_ptr_like<int> test_type;

void relocate(test_type* new_buffer, size_t size) {

    for (size_t i = 0; i != size; ++i) { // Single loop instead of two

        new_buffer[i].foo();

        new_buffer[i].bar();

    }

}

Optimizing over loop boundries is essential because multiple parts of C++

standard library traverse the same data twice. For example GCC's

std::vector::reserve has the following code:

  pointer __tmp = _M_allocate_and_copy(__n,

    _GLIBCXX_MAKE_MOVE_IF_NOEXCEPT_ITERATOR(this->_M_impl._M_start),

    _GLIBCXX_MAKE_MOVE_IF_NOEXCEPT_ITERATOR(this->_M_impl._M_finish));

  std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish,

                _M_get_Tp_allocator()); 

Checked on CLANG 4.0 (tags/RELEASE_400/final 297782) with flags -std=c++1z -O2</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>