[llvm-bugs] [Bug 47283] New: Suboptimal x86 vector extend patterns
via llvm-bugs
llvm-bugs at lists.llvm.org
Sat Aug 22 05:22:06 PDT 2020
https://bugs.llvm.org/show_bug.cgi?id=47283
Bug ID: 47283
Summary: Suboptimal x86 vector extend patterns
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: david.bolvansky at gmail.com
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
llvm-dev at redking.me.uk, spatel+llvm at rotateright.com
typedef unsigned char v32qi __attribute__((vector_size (32)));
typedef unsigned long long v4di __attribute__((vector_size (32)));
void
bar_u8_u64 (v4di * dst, v32qi src)
{
unsigned long long tem[4];
for (int i = 0; i < 4; i++)
tem[i] = src[i];
dst[0] = *(v4di *) tem;
}
Clang -O3:
bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32)):
# @bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32))
push rbp
mov rbp, rsp
and rsp, -32
sub rsp, 64
movdqa xmm0, xmmword ptr [rbp + 16]
pxor xmm1, xmm1
punpcklbw xmm0, xmm1 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
punpcklwd xmm0, xmm1 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
movdqa xmm2, xmm0
punpckldq xmm2, xmm1 # xmm2 =
xmm2[0],xmm1[0],xmm2[1],xmm1[1]
movdqa xmmword ptr [rsp + 16], xmm2
punpckhdq xmm0, xmm1 # xmm0 =
xmm0[2],xmm1[2],xmm0[3],xmm1[3]
movdqa xmmword ptr [rsp + 32], xmm0
movdqa xmmword ptr [rdi + 16], xmm0
movdqa xmmword ptr [rdi], xmm2
mov rsp, rbp
pop rbp
ret
Dispatch Width: 6
uOps Per Cycle: 1.59
IPC: 1.06
Block RThroughput: 5.0
ICC:
bar_u8_u64(unsigned long long __vector(4)*, unsigned char __vector(32)):
movd xmm2, DWORD PTR [8+rsp] #10.18
pxor xmm0, xmm0 #10.18
movdqa xmm1, xmm2 #10.18
psrldq xmm2, 2 #10.18
punpcklbw xmm1, xmm0 #10.18
punpcklbw xmm2, xmm0 #10.18
punpcklwd xmm1, xmm0 #10.18
punpcklwd xmm2, xmm0 #10.18
punpckldq xmm1, xmm0 #10.18
punpckldq xmm2, xmm0 #10.18
movdqu XMMWORD PTR [rdi], xmm1 #12.3
movdqu XMMWORD PTR [16+rdi], xmm2 #12.3
ret
Dispatch Width: 6
uOps Per Cycle: 2.40
IPC: 1.83
Block RThroughput: 7.0
https://godbolt.org/z/1xP3j8
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200822/678e13f2/attachment.html>
More information about the llvm-bugs
mailing list