[llvm-bugs] [Bug 32164] New: Inner product of two vectors gives inefficient code
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Mar 7 02:23:09 PST 2017
https://bugs.llvm.org/show_bug.cgi?id=32164
Bug ID: 32164
Summary: Inner product of two vectors gives inefficient code
Product: new-bugs
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: drraph at gmail.com
CC: llvm-bugs at lists.llvm.org
Consider:
typedef float __attribute__( ( vector_size( 16 ) ) ) float32x4_t;
typedef double __attribute__( ( vector_size( 32 ) ) ) float64x4_t;
float dotf(float32x4_t x, float32x4_t y) {
float ret=0;
for (int i=0;i!=4;++i) ret+=x[i]*y[i];
return ret;
}
double dotd(float64x4_t x, float64x4_t y) {
double ret=0;
for (int i=0;i!=4;++i) ret+=x[i]*y[i];
return ret;
}
clang trunk with -Ofast -march=corei7 gives:
dotf: # @dotf
mulps xmm0, xmm1
shufps xmm0, xmm0, 27 # xmm0 = xmm0[3,2,1,0]
movaps xmm1, xmm0
movhlps xmm1, xmm1 # xmm1 = xmm1[1,1]
addps xmm1, xmm0
haddps xmm1, xmm1
movaps xmm0, xmm1
ret
dotd: # @dotd
push rbp
mov rbp, rsp
and rsp, -32
sub rsp, 32
movapd xmm1, xmmword ptr [rbp + 16]
movapd xmm0, xmmword ptr [rbp + 32]
mulpd xmm1, xmmword ptr [rbp + 48]
mulpd xmm0, xmmword ptr [rbp + 64]
addpd xmm0, xmm1
shufpd xmm0, xmm0, 1 # xmm0 = xmm0[1,0]
haddpd xmm0, xmm0
mov rsp, rbp
pop rbp
ret
gcc gives more efficient code using the same flags:
dotf:
mulps xmm0, xmm1
haddps xmm0, xmm0
haddps xmm0, xmm0
ret
dotd:
movapd xmm0, XMMWORD PTR [rsp+8]
movapd xmm1, XMMWORD PTR [rsp+56]
mulpd xmm0, XMMWORD PTR [rsp+40]
mulpd xmm1, XMMWORD PTR [rsp+24]
addpd xmm0, xmm1
haddpd xmm0, xmm0
ret
In particular, clang appears to have an unnecessary prologue and epilogue and
an extra shuffle in the case of "double" and an extra shuffle and some more
work in the case of "float".
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170307/619ecf15/attachment.html>
More information about the llvm-bugs
mailing list