<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - [LLVM][X86] X86ISellowring ends with sub optimal instruction for shuffle pattern (VPUNPCKLWD) on AVX2 and above."
href="https://bugs.llvm.org/show_bug.cgi?id=33740">33740</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>[LLVM][X86] X86ISellowring ends with sub optimal instruction for shuffle pattern (VPUNPCKLWD) on AVX2 and above.
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>michael.zuckerman@intel.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>The following shuffle ends with sub-optimal instruction while it can choose
otherwise.
Consider the following ll sequence.
1 test.ll
X
define void @interleaved_store(<32 x i8> %x1, <32 x i8> %x2,<32 x i8>* %p) {
%v1 = shufflevector <32 x i8> %x1, <32 x i8> %x2, <32 x i32> <i32 0,i32 1,i32
16,i32 17,i32 2,i32 3,i32 18,i32 19,i32 4,i32 5,i32 20,i32 21,i32 6,i32 7,i32
22,i32 23,i32 8,i32 9,i32 24,i32 25,i32 10,i32 11,i32 26,i32 27,i32 12,i32
13,i32 28,i32 29,i32 14,i32 15,i32 30,i32 31>
store <32 x i8> %v1, <32 x i8>* %p
ret void
}
AVX (with above ll file) ends with optimal instruction **vpunpckhwd**(case1)
while in AVX2 and above we end with sub-optimal sequence for the same ll file
(as shown in case2)
******************************************case1*******************************************************
bash-4.2$ llc -mtriple=x86_64-pc-linux -mattr=+avx < test.ll
.text
.file "<stdin>"
.globl interleaved_store # -- Begin function interleaved_store
.p2align 4, 0x90
.type interleaved_store,@function
interleaved_store: # @interleaved_store
.cfi_startproc
# BB#0:
vextractf128 $1, %ymm0, %xmm1
vpunpckhwd %xmm1, %xmm0, %xmm2 # xmm2 =
xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
vpunpcklwd %xmm1, %xmm0, %xmm0 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
vinsertf128 $1, %xmm2, %ymm0, %ymm0
vmovaps %ymm0, (%rdi)
vzeroupper
retq
.Lfunc_end0:
.size interleaved_store, .Lfunc_end0-interleaved_store
.cfi_endproc
# -- End function
.section ".note.GNU-stack","",@progbits
*******************************************************************************************************
****************************************case2********************************************************
bash-4.2$ llc -mtriple=x86_64-pc-linux -mattr=+avx2 < test.ll
.text
.file "<stdin>"
.section .rodata.cst32,"aM",@progbits,32
.p2align 5 # -- Begin function interleaved_store
.LCPI0_0:
.byte 8 # 0x8
.byte 9 # 0x9
.byte 0 # 0x0
.byte 1 # 0x1
.byte 10 # 0xa
.byte 11 # 0xb
.byte 2 # 0x2
.byte 3 # 0x3
.byte 12 # 0xc
.byte 13 # 0xd
.byte 4 # 0x4
.byte 5 # 0x5
.byte 14 # 0xe
.byte 15 # 0xf
.byte 6 # 0x6
.byte 7 # 0x7
.byte 24 # 0x18
.byte 25 # 0x19
.byte 16 # 0x10
.byte 17 # 0x11
.byte 26 # 0x1a
.byte 27 # 0x1b
.byte 18 # 0x12
.byte 19 # 0x13
.byte 28 # 0x1c
.byte 29 # 0x1d
.byte 20 # 0x14
.byte 21 # 0x15
.byte 30 # 0x1e
.byte 31 # 0x1f
.byte 22 # 0x16
.byte 23 # 0x17
.LCPI0_1:
.byte 0 # 0x0
.byte 1 # 0x1
.byte 8 # 0x8
.byte 9 # 0x9
.byte 2 # 0x2
.byte 3 # 0x3
.byte 10 # 0xa
.byte 11 # 0xb
.byte 4 # 0x4
.byte 5 # 0x5
.byte 12 # 0xc
.byte 13 # 0xd
.byte 6 # 0x6
.byte 7 # 0x7
.byte 14 # 0xe
.byte 15 # 0xf
.byte 16 # 0x10
.byte 17 # 0x11
.byte 24 # 0x18
.byte 25 # 0x19
.byte 18 # 0x12
.byte 19 # 0x13
.byte 26 # 0x1a
.byte 27 # 0x1b
.byte 20 # 0x14
.byte 21 # 0x15
.byte 28 # 0x1c
.byte 29 # 0x1d
.byte 22 # 0x16
.byte 23 # 0x17
.byte 30 # 0x1e
.byte 31 # 0x1f
.LCPI0_2:
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.byte 0 # 0x0
.byte 0 # 0x0
.byte 255 # 0xff
.byte 255 # 0xff
.text
.globl interleaved_store
.p2align 4, 0x90
.type interleaved_store,@function
interleaved_store: # @interleaved_store
.cfi_startproc
# BB#0:
vperm2i128 $35, %ymm0, %ymm0, %ymm1 # ymm1 = ymm0[2,3,0,1]
vpshufb .LCPI0_0(%rip), %ymm1, %ymm1 # ymm1 =
ymm1[8,9,0,1,10,11,2,3,12,13,4,5,14,15,6,7,24,25,16,17,26,27,18,19,28,29,20,21,30,31,22,23]
vpshufb .LCPI0_1(%rip), %ymm0, %ymm0 # ymm0 =
ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
vmovdqa .LCPI0_2(%rip), %ymm2 # ymm2 =
[255,255,0,0,255,255,0,0,255,255,0,0,255,255,0,0,0,0,255,255,0,0,255,255,0,0,255,255,0,0,255,255]
vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
vmovdqa %ymm0, (%rdi)
vzeroupper
retq</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>