<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - [SLP] Missed vectorization - failed to move load with loop-invariant address"
href="https://bugs.llvm.org/show_bug.cgi?id=47889">47889</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>[SLP] Missed vectorization - failed to move load with loop-invariant address
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Scalar Optimizations
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>david.bolvansky@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#define N 16
struct osmesa_context {
unsigned int clearpixel;
void *buffer;
};
struct osmesa_context *osmesa;
int run (void *p)
{
unsigned int i, n, *ptr;
n = N;
ptr = (unsigned int *) osmesa->buffer;
for (i = 0; i < n; i++) {
*ptr++ = osmesa->clearpixel;
}
return 0;
}
ICC and GCC can vectorize this loop.
ICC generates:
run:
mov rax, QWORD PTR osmesa[rip] #20.26
mov rdx, QWORD PTR [8+rax] #20.26
mov rcx, rdx #22.14
sub rcx, rax #22.14
cmp rcx, 16 #21.3
jge ..B1.3 # Prob 50% #21.3
neg rcx #22.6
cmp rcx, 64 #21.3
jl ..B1.4 # Prob 50% #21.3
..B1.3: # Preds ..B1.1 ..B1.2
movd xmm0, DWORD PTR [rax] #22.14
pshufd xmm1, xmm0, 0 #22.14
movdqu XMMWORD PTR [rdx], xmm1 #22.6
movdqu XMMWORD PTR [16+rdx], xmm1 #22.6
movdqu XMMWORD PTR [32+rdx], xmm1 #22.6
movdqu XMMWORD PTR [48+rdx], xmm1 #22.6
jmp ..B1.5 # Prob 100% #22.6
..B1.4: # Preds ..B1.2
mov ecx, DWORD PTR [rax] #22.14
mov DWORD PTR [rdx], ecx #22.6
mov esi, DWORD PTR [rax] #22.14
mov DWORD PTR [4+rdx], esi #22.6
mov edi, DWORD PTR [rax] #22.14
mov DWORD PTR [8+rdx], edi #22.6
mov r8d, DWORD PTR [rax] #22.14
mov DWORD PTR [12+rdx], r8d #22.6
mov r9d, DWORD PTR [rax] #22.14
mov DWORD PTR [16+rdx], r9d #22.6
mov r10d, DWORD PTR [rax] #22.14
mov DWORD PTR [20+rdx], r10d #22.6
mov r11d, DWORD PTR [rax] #22.14
mov DWORD PTR [24+rdx], r11d #22.6
mov ecx, DWORD PTR [rax] #22.14
mov DWORD PTR [28+rdx], ecx #22.6
mov esi, DWORD PTR [rax] #22.14
mov DWORD PTR [32+rdx], esi #22.6
mov edi, DWORD PTR [rax] #22.14
mov DWORD PTR [36+rdx], edi #22.6
mov r8d, DWORD PTR [rax] #22.14
mov DWORD PTR [40+rdx], r8d #22.6
mov r9d, DWORD PTR [rax] #22.14
mov DWORD PTR [44+rdx], r9d #22.6
mov r10d, DWORD PTR [rax] #22.14
mov DWORD PTR [48+rdx], r10d #22.6
mov r11d, DWORD PTR [rax] #22.14
mov DWORD PTR [52+rdx], r11d #22.6
mov ecx, DWORD PTR [rax] #22.14
mov DWORD PTR [56+rdx], ecx #22.6
mov eax, DWORD PTR [rax] #22.14
mov DWORD PTR [60+rdx], eax #22.6
..B1.5: # Preds ..B1.4 ..B1.3
xor eax, eax #25.10
ret #25.10
Codegen: <a href="https://godbolt.org/z/5f6YnY">https://godbolt.org/z/5f6YnY</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>