[llvm-bugs] [Bug 33740] New: [LLVM][X86] X86ISellowring ends with sub optimal instruction for shuffle pattern (VPUNPCKLWD) on AVX2 and above.
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Jul 11 00:48:24 PDT 2017
https://bugs.llvm.org/show_bug.cgi?id=33740
Bug ID: 33740
Summary: [LLVM][X86] X86ISellowring ends with sub optimal
instruction for shuffle pattern (VPUNPCKLWD) on AVX2
and above.
Product: new-bugs
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: michael.zuckerman at intel.com
CC: llvm-bugs at lists.llvm.org
The following shuffle ends with sub-optimal instruction while it can choose
otherwise.
Consider the following ll sequence.
1 test.ll
X
define void @interleaved_store(<32 x i8> %x1, <32 x i8> %x2,<32 x i8>* %p) {
%v1 = shufflevector <32 x i8> %x1, <32 x i8> %x2, <32 x i32> <i32 0,i32 1,i32
16,i32 17,i32 2,i32 3,i32 18,i32 19,i32 4,i32 5,i32 20,i32 21,i32 6,i32 7,i32
22,i32 23,i32 8,i32 9,i32 24,i32 25,i32 10,i32 11,i32 26,i32 27,i32 12,i32
13,i32 28,i32 29,i32 14,i32 15,i32 30,i32 31>
store <32 x i8> %v1, <32 x i8>* %p
ret void
}
AVX (with above ll file) ends with optimal instruction **vpunpckhwd**(case1)
while in AVX2 and above we end with sub-optimal sequence for the same ll file
(as shown in case2)
******************************************case1*******************************************************
bash-4.2$ llc -mtriple=x86_64-pc-linux -mattr=+avx < test.ll
.text
.file "<stdin>"
.globl interleaved_store # -- Begin function interleaved_store
.p2align 4, 0x90
.type interleaved_store, at function
interleaved_store: # @interleaved_store
.cfi_startproc
# BB#0:
vextractf128 $1, %ymm0, %xmm1
vpunpckhwd %xmm1, %xmm0, %xmm2 # xmm2 =
xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
vpunpcklwd %xmm1, %xmm0, %xmm0 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
vinsertf128 $1, %xmm2, %ymm0, %ymm0
vmovaps %ymm0, (%rdi)
vzeroupper
retq
.Lfunc_end0:
.size interleaved_store, .Lfunc_end0-interleaved_store
.cfi_endproc
# -- End function
.section ".note.GNU-stack","", at progbits
*******************************************************************************************************
****************************************case2********************************************************
bash-4.2$ llc -mtriple=x86_64-pc-linux -mattr=+avx2 < test.ll
.text
.file "<stdin>"
.section .rodata.cst32,"aM", at progbits,32
.p2align 5 # -- Begin function interleaved_store
.LCPI0_0:
.byte 8 # 0x8
.byte 9 # 0x9
.byte 0 # 0x0
.byte 1 # 0x1
.byte 10 # 0xa
.byte 11 # 0xb
.byte 2 # 0x2
.byte 3 # 0x3
.byte 12 # 0xc
.byte 13 # 0xd
.byte 4 # 0x4
.byte 5 # 0x5
.byte 14 # 0xe
.byte 15 # 0xf
.byte 6 # 0x6
.byte 7 # 0x7
.byte 24 # 0x18
.byte 25 # 0x19
.byte 16 # 0x10
.byte 17 # 0x11
.byte 26 # 0x1a
.byte 27 # 0x1b
.byte 18 # 0x12
.byte 19 # 0x13
.byte 28 # 0x1c
.byte 29 # 0x1d
.byte 20 # 0x14
.byte 21 # 0x15
.byte 30 # 0x1e
.byte 31 # 0x1f
.byte 22 # 0x16
.byte 23 # 0x17
.LCPI0_1:
.byte 0 # 0x0
.byte 1 # 0x1
.byte 8 # 0x8
.byte 9 # 0x9
.byte 2 # 0x2
.byte 3 # 0x3
.byte 10 # 0xa
.byte 11 # 0xb
.byte 4 # 0x4
.byte 5 # 0x5
.byte 12 # 0xc
.byte 13 # 0xd
.byte 6 # 0x6
.byte 7 # 0x7
.byte 14 # 0xe
.byte 15 # 0xf
.byte 16 # 0x10
.byte 17 # 0x11
.byte 24 # 0x18
.byte 25 # 0x19
.byte 18 # 0x12
.byte 19 # 0x13
.byte 26 # 0x1a
.byte 27 # 0x1b
.byte 20 # 0x14
.byte 21 # 0x15
.byte 28 # 0x1c
.byte 29 # 0x1d
.byte 22 # 0x16
.byte 23 # 0x17
.byte 30 # 0x1e
.byte 31 # 0x1f
.LCPI0_2:
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.text
.globl interleaved_store
.p2align 4, 0x90
.type interleaved_store, at function
interleaved_store: # @interleaved_store
.cfi_startproc
# BB#0:
vperm2i128 $35, %ymm0, %ymm0, %ymm1 # ymm1 = ymm0[2,3,0,1]
vpshufb .LCPI0_0(%rip), %ymm1, %ymm1 # ymm1 =
ymm1[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
vpshufb .LCPI0_1(%rip), %ymm0, %ymm0 # ymm0 =
ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
vmovdqa .LCPI0_2(%rip), %ymm2 # ymm2 =
[255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
vmovdqa %ymm0, (%rdi)
vzeroupper
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170711/f8b59e50/attachment-0001.html>
More information about the llvm-bugs
mailing list