[llvm-bugs] [Bug 25959] New: [x86, SSE] suboptimal register allocation for scalar minnum/maxnum
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Dec 28 13:35:03 PST 2015
https://llvm.org/bugs/show_bug.cgi?id=25959
Bug ID: 25959
Summary: [x86, SSE] suboptimal register allocation for scalar
minnum/maxnum
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: normal
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: spatel+llvm at rotateright.com
CC: llvm-bugs at lists.llvm.org
Classification: Unclassified
Follow-on from bug 24475: the scalar codegen could be almost identical to the
vector codegen (just change 'ps' ops to 'ss' ops), but different register
allocation causes us to need more register copies.
$ cat fmax.ll
declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
declare float @llvm.maxnum.f32(float, float)
define float @maxnum_f32(float %x, float %y) {
%call = tail call float @llvm.maxnum.f32(float %x, float %y)
ret float %call
}
define <4 x float> @maxnum_v4f32(<4 x float> %x, <4 x float> %y) {
%call = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>
%y)
ret <4 x float> %call
}
$ ./llc fmax.ll -o -
_maxnum_f32:
movaps %xmm0, %xmm2 <--- could have copied %xmm1 instead
cmpunordss %xmm2, %xmm2
movaps %xmm2, %xmm3 <--- extra move
andps %xmm1, %xmm3
maxss %xmm0, %xmm1
andnps %xmm1, %xmm2
orps %xmm3, %xmm2
movaps %xmm2, %xmm0 <--- extra move
retq
_maxnum_v4f32:
movaps %xmm1, %xmm2
maxps %xmm0, %xmm2
cmpunordps %xmm0, %xmm0
andps %xmm0, %xmm1
andnps %xmm2, %xmm0
orps %xmm1, %xmm0
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20151228/b312cebe/attachment-0001.html>
More information about the llvm-bugs
mailing list