[llvm-bugs] [Bug 43899] New: Incomplete optimization during loop vectorization on large arrays.
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Nov 4 09:34:25 PST 2019
https://bugs.llvm.org/show_bug.cgi?id=43899
Bug ID: 43899
Summary: Incomplete optimization during loop vectorization on
large arrays.
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Loop Optimizer
Assignee: unassignedbugs at nondot.org
Reporter: a.rainman at gmail.com
CC: llvm-bugs at lists.llvm.org
I use x86-64 trunk or 9.0 version with -Ofast
The example functions creates different code for same array and loop size, this
is a bug because loops are identical.
For comparsion asm codes I create the example: https://godbolt.org/z/YgKRZO
#include <cstddef>
#include <cstdint>
#include <array>
typedef
int64_t
my_c_arr[1024 * 1024 * 1024];
typedef
std::array<int64_t, 1024 * 1024 * 1024>
my_arr;
void compute_1(my_c_arr& input)
{
for (auto i: input)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_2(my_arr& input)
{
for (auto i: input)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_3(my_arr& input)
{
for (auto i = input.begin(); i != input.cend(); ++i)
{
*i = (*i + 3254) * 3;
}
}
all compute_1... creates asm:
compute_1(long (&) [1073741824]): # @compute_1(long (&) [1073741824])
movabs rax, 8589934592
add rax, rdi
mov rcx, rdi
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rdx, qword ptr [rcx]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi]
add rsi, 9762
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 8]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi + 9762]
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 16]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi + 9762]
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 24]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi]
add rsi, 9762
mov qword ptr [rdi + 8*rdx], rsi
add rcx, 32
cmp rcx, rax
jne .LBB0_1
ret
void compute_10(my_c_arr& input)
{
for (auto i = 0; i != sizeof(input) / sizeof(input[0]); ++i)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_11(my_arr& input)
{
for (auto i = 0; i != input.size(); ++i)
{
input[i] = (input[i] + 3254) * 3;
}
}
all of compute_1*... creates this asm code:
compute_10(long (&) [1073741824]): # @compute_10(long (&) [1073741824])
xor eax, eax
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rcx, qword ptr [rdi + 8*rax]
mov rdx, qword ptr [rdi + 8*rax + 8]
lea rcx, [rcx + 2*rcx]
add rcx, 9762
mov qword ptr [rdi + 8*rax], rcx
lea rcx, [rdx + 2*rdx + 9762]
mov qword ptr [rdi + 8*rax + 8], rcx
mov rcx, qword ptr [rdi + 8*rax + 16]
lea rcx, [rcx + 2*rcx + 9762]
mov qword ptr [rdi + 8*rax + 16], rcx
mov rcx, qword ptr [rdi + 8*rax + 24]
lea rcx, [rcx + 2*rcx]
add rcx, 9762
mov qword ptr [rdi + 8*rax + 24], rcx
add rax, 4
cmp rax, 1073741824
jne .LBB0_1
ret
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191104/e4d00196/attachment-0001.html>
More information about the llvm-bugs
mailing list