<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Incomplete optimization during loop vectorization on large arrays."

   href="https://bugs.llvm.org/show_bug.cgi?id=43899">43899</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Incomplete optimization during loop vectorization on large arrays.

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>All

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Loop Optimizer

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>a.rainman@gmail.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>I use x86-64 trunk or 9.0 version with -Ofast

The example functions creates different code for same array and loop size, this

is a bug because loops are identical.

For comparsion asm codes I create the example: <a href="https://godbolt.org/z/YgKRZO">https://godbolt.org/z/YgKRZO</a>

#include <cstddef>

#include <cstdint>

#include <array>

typedef

    int64_t

        my_c_arr[1024 * 1024 * 1024];

typedef

    std::array<int64_t, 1024 * 1024 * 1024>

        my_arr;

void compute_1(my_c_arr& input)

{

    for (auto i: input)

    {

        input[i] = (input[i] + 3254) * 3;

    }

}

void compute_2(my_arr& input)

{

    for (auto i: input)

    {

        input[i] = (input[i] + 3254) * 3;

    }

}

void compute_3(my_arr& input)

{

    for (auto i = input.begin(); i != input.cend(); ++i)

    {

        *i = (*i + 3254) * 3;

    }

}

all compute_1... creates asm:

compute_1(long (&) [1073741824]):         # @compute_1(long (&) [1073741824])

        movabs  rax, 8589934592

        add     rax, rdi

        mov     rcx, rdi

.LBB0_1:                                # =>This Inner Loop Header: Depth=1

        mov     rdx, qword ptr [rcx]

        mov     rsi, qword ptr [rdi + 8*rdx]

        lea     rsi, [rsi + 2*rsi]

        add     rsi, 9762

        mov     qword ptr [rdi + 8*rdx], rsi

        mov     rdx, qword ptr [rcx + 8]

        mov     rsi, qword ptr [rdi + 8*rdx]

        lea     rsi, [rsi + 2*rsi + 9762]

        mov     qword ptr [rdi + 8*rdx], rsi

        mov     rdx, qword ptr [rcx + 16]

        mov     rsi, qword ptr [rdi + 8*rdx]

        lea     rsi, [rsi + 2*rsi + 9762]

        mov     qword ptr [rdi + 8*rdx], rsi

        mov     rdx, qword ptr [rcx + 24]

        mov     rsi, qword ptr [rdi + 8*rdx]

        lea     rsi, [rsi + 2*rsi]

        add     rsi, 9762

        mov     qword ptr [rdi + 8*rdx], rsi

        add     rcx, 32

        cmp     rcx, rax

        jne     .LBB0_1

        ret

void compute_10(my_c_arr& input)

{

    for (auto i = 0; i != sizeof(input) / sizeof(input[0]); ++i)

    {

        input[i] = (input[i] + 3254) * 3;

    }

}

void compute_11(my_arr& input)

{

    for (auto i = 0; i != input.size(); ++i)

    {

        input[i] = (input[i] + 3254) * 3;

    }

}

all of compute_1*... creates this asm code:

compute_10(long (&) [1073741824]):       # @compute_10(long (&) [1073741824])

        xor     eax, eax

.LBB0_1:                                # =>This Inner Loop Header: Depth=1

        mov     rcx, qword ptr [rdi + 8*rax]

        mov     rdx, qword ptr [rdi + 8*rax + 8]

        lea     rcx, [rcx + 2*rcx]

        add     rcx, 9762

        mov     qword ptr [rdi + 8*rax], rcx

        lea     rcx, [rdx + 2*rdx + 9762]

        mov     qword ptr [rdi + 8*rax + 8], rcx

        mov     rcx, qword ptr [rdi + 8*rax + 16]

        lea     rcx, [rcx + 2*rcx + 9762]

        mov     qword ptr [rdi + 8*rax + 16], rcx

        mov     rcx, qword ptr [rdi + 8*rax + 24]

        lea     rcx, [rcx + 2*rcx]

        add     rcx, 9762

        mov     qword ptr [rdi + 8*rax + 24], rcx

        add     rax, 4

        cmp     rax, 1073741824

        jne     .LBB0_1

        ret</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>