[llvm-bugs] [Bug 43899] New: Incomplete optimization during loop vectorization on large arrays.

Mon Nov 4 09:34:25 PST 2019

https://bugs.llvm.org/show_bug.cgi?id=43899

            Bug ID: 43899
           Summary: Incomplete optimization during loop vectorization on
                    large arrays.
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: a.rainman at gmail.com
                CC: llvm-bugs at lists.llvm.org

I use x86-64 trunk or 9.0 version with -Ofast

The example functions creates different code for same array and loop size, this
is a bug because loops are identical.

For comparsion asm codes I create the example: https://godbolt.org/z/YgKRZO

#include <cstddef>
#include <cstdint>
#include <array>

typedef
    int64_t
        my_c_arr[1024 * 1024 * 1024];

typedef
    std::array<int64_t, 1024 * 1024 * 1024>
        my_arr;

void compute_1(my_c_arr& input)
{
    for (auto i: input)
    {
        input[i] = (input[i] + 3254) * 3;
    }
}

void compute_2(my_arr& input)
{
    for (auto i: input)
    {
        input[i] = (input[i] + 3254) * 3;
    }
}

void compute_3(my_arr& input)
{
    for (auto i = input.begin(); i != input.cend(); ++i)
    {
        *i = (*i + 3254) * 3;
    }
}

all compute_1... creates asm:

compute_1(long (&) [1073741824]):         # @compute_1(long (&) [1073741824])
        movabs  rax, 8589934592
        add     rax, rdi
        mov     rcx, rdi
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        mov     rdx, qword ptr [rcx]
        mov     rsi, qword ptr [rdi + 8*rdx]
        lea     rsi, [rsi + 2*rsi]
        add     rsi, 9762
        mov     qword ptr [rdi + 8*rdx], rsi
        mov     rdx, qword ptr [rcx + 8]
        mov     rsi, qword ptr [rdi + 8*rdx]
        lea     rsi, [rsi + 2*rsi + 9762]
        mov     qword ptr [rdi + 8*rdx], rsi
        mov     rdx, qword ptr [rcx + 16]
        mov     rsi, qword ptr [rdi + 8*rdx]
        lea     rsi, [rsi + 2*rsi + 9762]
        mov     qword ptr [rdi + 8*rdx], rsi
        mov     rdx, qword ptr [rcx + 24]
        mov     rsi, qword ptr [rdi + 8*rdx]
        lea     rsi, [rsi + 2*rsi]
        add     rsi, 9762
        mov     qword ptr [rdi + 8*rdx], rsi
        add     rcx, 32
        cmp     rcx, rax
        jne     .LBB0_1
        ret

void compute_10(my_c_arr& input)
{
    for (auto i = 0; i != sizeof(input) / sizeof(input[0]); ++i)
    {
        input[i] = (input[i] + 3254) * 3;
    }
}

void compute_11(my_arr& input)
{
    for (auto i = 0; i != input.size(); ++i)
    {
        input[i] = (input[i] + 3254) * 3;
    }
}

all of compute_1*... creates this asm code:

compute_10(long (&) [1073741824]):       # @compute_10(long (&) [1073741824])
        xor     eax, eax
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        mov     rcx, qword ptr [rdi + 8*rax]
        mov     rdx, qword ptr [rdi + 8*rax + 8]
        lea     rcx, [rcx + 2*rcx]
        add     rcx, 9762
        mov     qword ptr [rdi + 8*rax], rcx
        lea     rcx, [rdx + 2*rdx + 9762]
        mov     qword ptr [rdi + 8*rax + 8], rcx
        mov     rcx, qword ptr [rdi + 8*rax + 16]
        lea     rcx, [rcx + 2*rcx + 9762]
        mov     qword ptr [rdi + 8*rax + 16], rcx
        mov     rcx, qword ptr [rdi + 8*rax + 24]
        lea     rcx, [rcx + 2*rcx]
        add     rcx, 9762
        mov     qword ptr [rdi + 8*rax + 24], rcx
        add     rax, 4
        cmp     rax, 1073741824
        jne     .LBB0_1
        ret

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191104/e4d00196/attachment-0001.html>