<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Suboptimal x86 vector extend patterns"
href="https://bugs.llvm.org/show_bug.cgi?id=47283">47283</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Suboptimal x86 vector extend patterns
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>david.bolvansky@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, spatel+llvm@rotateright.com
</td>
</tr></table>
<p>
<div>
<pre>typedef unsigned char v32qi __attribute__((vector_size (32)));
typedef unsigned long long v4di __attribute__((vector_size (32)));
void
bar_u8_u64 (v4di * dst, v32qi src)
{
unsigned long long tem[4];
for (int i = 0; i < 4; i++)
tem[i] = src[i];
dst[0] = *(v4di *) tem;
}
Clang -O3:
bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32)):
# @bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32))
push rbp
mov rbp, rsp
and rsp, -32
sub rsp, 64
movdqa xmm0, xmmword ptr [rbp + 16]
pxor xmm1, xmm1
punpcklbw xmm0, xmm1 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
punpcklwd xmm0, xmm1 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
movdqa xmm2, xmm0
punpckldq xmm2, xmm1 # xmm2 =
xmm2[0],xmm1[0],xmm2[1],xmm1[1]
movdqa xmmword ptr [rsp + 16], xmm2
punpckhdq xmm0, xmm1 # xmm0 =
xmm0[2],xmm1[2],xmm0[3],xmm1[3]
movdqa xmmword ptr [rsp + 32], xmm0
movdqa xmmword ptr [rdi + 16], xmm0
movdqa xmmword ptr [rdi], xmm2
mov rsp, rbp
pop rbp
ret
Dispatch Width: 6
uOps Per Cycle: 1.59
IPC: 1.06
Block RThroughput: 5.0
ICC:
bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32)):
movd xmm2, DWORD PTR [8+rsp] #10.18
pxor xmm0, xmm0 #10.18
movdqa xmm1, xmm2 #10.18
psrldq xmm2, 2 #10.18
punpcklbw xmm1, xmm0 #10.18
punpcklbw xmm2, xmm0 #10.18
punpcklwd xmm1, xmm0 #10.18
punpcklwd xmm2, xmm0 #10.18
punpckldq xmm1, xmm0 #10.18
punpckldq xmm2, xmm0 #10.18
movdqu XMMWORD PTR [rdi], xmm1 #12.3
movdqu XMMWORD PTR [16+rdi], xmm2 #12.3
ret
Dispatch Width: 6
uOps Per Cycle: 2.40
IPC: 1.83
Block RThroughput: 7.0
<a href="https://godbolt.org/z/1xP3j8">https://godbolt.org/z/1xP3j8</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>