[llvm-bugs] [Bug 42023] New: [X86] Failure to use HADD for reduction add patterns
via llvm-bugs
llvm-bugs at lists.llvm.org
Sun May 26 05:24:37 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=42023
Bug ID: 42023
Summary: [X86] Failure to use HADD for reduction add patterns
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
llvm-dev at redking.me.uk, spatel+llvm at rotateright.com
https://godbolt.org/z/g1wL8n
define i32 @hadd32_4(<4 x i32> %x225) optsize {
%x226 = shufflevector <4 x i32> %x225, <4 x i32> undef, <4 x i32> <i32 2, i32
3, i32 undef, i32 undef>
%x227 = add <4 x i32> %x225, %x226
%x228 = shufflevector <4 x i32> %x227, <4 x i32> undef, <4 x i32> <i32 1, i32
undef, i32 undef, i32 undef>
%x229 = add <4 x i32> %x227, %x228
%x230 = extractelement <4 x i32> %x229, i32 0
ret i32 %x230
}
define i16 @hadd16_8(<8 x i16> %x223) optsize {
%x224 = shufflevector <8 x i16> %x223, <8 x i16> undef, <8 x i32> <i32 4, i32
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
%x225 = add <8 x i16> %x223, %x224
%x226 = shufflevector <8 x i16> %x225, <8 x i16> undef, <8 x i32> <i32 2, i32
3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x227 = add <8 x i16> %x225, %x226
%x228 = shufflevector <8 x i16> %x227, <8 x i16> undef, <8 x i32> <i32 1, i32
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x229 = add <8 x i16> %x227, %x228
%x230 = extractelement <8 x i16> %x229, i32 0
ret i16 %x230
}
llc -mcpu=btver2
hadd32_4:
vpshufd $78, %xmm0, %xmm1 # xmm1 = xmm0[2,3,0,1]
vpaddd %xmm1, %xmm0, %xmm0
vphaddd %xmm0, %xmm0, %xmm0
vmovd %xmm0, %eax
retq
hadd16_8: # @hadd16_8
vpshufd $78, %xmm0, %xmm1 # xmm1 = xmm0[2,3,0,1]
vpaddw %xmm1, %xmm0, %xmm0
vpshufd $229, %xmm0, %xmm1 # xmm1 = xmm0[1,1,2,3]
vpaddw %xmm1, %xmm0, %xmm0
vphaddw %xmm0, %xmm0, %xmm0
vmovd %xmm0, %eax
retq
We should always be able to fold integer horizontal reduction patterns like
this, and fadd patterns if we are safe to reassociate.
Pulled out of [Bug #33758] where we are still missing some (optsize) HADD cases
(see also https://godbolt.org/z/yP5CgS ).
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190526/fc9b768c/attachment.html>
More information about the llvm-bugs
mailing list