<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Incomplete optimization during loop vectorization on large arrays."
href="https://bugs.llvm.org/show_bug.cgi?id=43899">43899</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Incomplete optimization during loop vectorization on large arrays.
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Loop Optimizer
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>a.rainman@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>I use x86-64 trunk or 9.0 version with -Ofast
The example functions creates different code for same array and loop size, this
is a bug because loops are identical.
For comparsion asm codes I create the example: <a href="https://godbolt.org/z/YgKRZO">https://godbolt.org/z/YgKRZO</a>
#include <cstddef>
#include <cstdint>
#include <array>
typedef
int64_t
my_c_arr[1024 * 1024 * 1024];
typedef
std::array<int64_t, 1024 * 1024 * 1024>
my_arr;
void compute_1(my_c_arr& input)
{
for (auto i: input)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_2(my_arr& input)
{
for (auto i: input)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_3(my_arr& input)
{
for (auto i = input.begin(); i != input.cend(); ++i)
{
*i = (*i + 3254) * 3;
}
}
all compute_1... creates asm:
compute_1(long (&) [1073741824]): # @compute_1(long (&) [1073741824])
movabs rax, 8589934592
add rax, rdi
mov rcx, rdi
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rdx, qword ptr [rcx]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi]
add rsi, 9762
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 8]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi + 9762]
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 16]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi + 9762]
mov qword ptr [rdi + 8*rdx], rsi
mov rdx, qword ptr [rcx + 24]
mov rsi, qword ptr [rdi + 8*rdx]
lea rsi, [rsi + 2*rsi]
add rsi, 9762
mov qword ptr [rdi + 8*rdx], rsi
add rcx, 32
cmp rcx, rax
jne .LBB0_1
ret
void compute_10(my_c_arr& input)
{
for (auto i = 0; i != sizeof(input) / sizeof(input[0]); ++i)
{
input[i] = (input[i] + 3254) * 3;
}
}
void compute_11(my_arr& input)
{
for (auto i = 0; i != input.size(); ++i)
{
input[i] = (input[i] + 3254) * 3;
}
}
all of compute_1*... creates this asm code:
compute_10(long (&) [1073741824]): # @compute_10(long (&) [1073741824])
xor eax, eax
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rcx, qword ptr [rdi + 8*rax]
mov rdx, qword ptr [rdi + 8*rax + 8]
lea rcx, [rcx + 2*rcx]
add rcx, 9762
mov qword ptr [rdi + 8*rax], rcx
lea rcx, [rdx + 2*rdx + 9762]
mov qword ptr [rdi + 8*rax + 8], rcx
mov rcx, qword ptr [rdi + 8*rax + 16]
lea rcx, [rcx + 2*rcx + 9762]
mov qword ptr [rdi + 8*rax + 16], rcx
mov rcx, qword ptr [rdi + 8*rax + 24]
lea rcx, [rcx + 2*rcx]
add rcx, 9762
mov qword ptr [rdi + 8*rax + 24], rcx
add rax, 4
cmp rax, 1073741824
jne .LBB0_1
ret</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>