[llvm-bugs] [Bug 33434] New: 265 bit double shuffles not optimal
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Jun 13 03:10:43 PDT 2017
https://bugs.llvm.org/show_bug.cgi?id=33434
Bug ID: 33434
Summary: 265 bit double shuffles not optimal
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: tobias at grosser.es
CC: llvm-bugs at lists.llvm.org
Hi,
I just tried to generate AVX2 code for some 256 bit AVX2 double shuffles, but
despite Chandler's outstanding work on improving X86 shuffles two years ago,
the shuffle sequences seem not be be optimal (using llc out.ll -o -
-mcpu=x86-64 -mattr=+avx2 on r304555).
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @test_0(<4 x double>* %PA, <4 x double>* %PB) {
entry:
%A = load <4 x double>, <4 x double>* %PA
%B = load <4 x double>, <4 x double>* %PB
%SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 0, i32
4, i32 2, i32 3>
%SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 1, i32
5, i32 6, i32 7>
; vmovddup %xmm1, %xmm2 # xmm2 = xmm1[0,0]
; vblendpd $2, %ymm2, %ymm0, %ymm2 # ymm2 =
ymm0[0],ymm2[1],ymm0[2,3]
; vpermilpd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0]
; vblendpd $1, %ymm0, %ymm1, %ymm0 # ymm0 = ymm0[0],ymm1[1,2,3]
store <4 x double> %SA, <4 x double>* %PA
store <4 x double> %SB, <4 x double>* %PB
ret void
}
define void @test_1(<4 x double>* %PA, <4 x double>* %PB) {
entry:
%A = load <4 x double>, <4 x double>* %PA
%B = load <4 x double>, <4 x double>* %PB
%SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 4, i32
5, i32 0, i32 6>
%SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 2, i32
3, i32 1, i32 7>
; vinsertf128 $1, %xmm0, %ymm0, %ymm2
; vpermilpd $2, %ymm1, %ymm3 # ymm3 = ymm1[0,1,2,2]
; vblendpd $4, %ymm2, %ymm3, %ymm2 # ymm2 =
ymm3[0,1],ymm2[2],ymm3[3]
; vpermpd $222, %ymm0, %ymm0 # ymm0 = ymm0[2,3,1,3]
; vblendpd $8, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[0,1,2],ymm1[3]
store <4 x double> %SA, <4 x double>* %PA
store <4 x double> %SB, <4 x double>* %PB
ret void
}
define void @test_2(<4 x double>* %PA, <4 x double>* %PB) {
entry:
%A = load <4 x double>, <4 x double>* %PA
%B = load <4 x double>, <4 x double>* %PB
%SA = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 0, i32
1, i32 4, i32 5>
%SB = shufflevector <4 x double> %A, <4 x double> %B, <4 x i32> <i32 2, i32
3, i32 6, i32 7>
; vinsertf128 $1, %xmm1, %ymm0, %ymm2
; vperm2f128 $49, %ymm1, %ymm0, %ymm0 # ymm0 = ymm0[2,3],ymm1[2,3]
store <4 x double> %SA, <4 x double>* %PA
store <4 x double> %SB, <4 x double>* %PB
ret void
}
Am I missing something or could these really be translated to at most two
vblendpd instructions?
Best,
Tobias
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170613/335bb472/attachment.html>
More information about the llvm-bugs
mailing list