[llvm-bugs] [Bug 39538] New: [SLPVectorizer] Poor vectorization of endian-swap patterns
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri Nov 2 08:56:15 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=39538
Bug ID: 39538
Summary: [SLPVectorizer] Poor vectorization of endian-swap
patterns
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: a.bataev at hotmail.com, lebedev.ri at gmail.com,
llvm-bugs at lists.llvm.org, spatel+llvm at rotateright.com
https://godbolt.org/z/e4gotY
void movbe(const uint8_t * __restrict src, uint32_t * __restrict dst) {
dst[0] = (src[0]) << 24
| src[1] << 16
| src[2] << 8
| src[3];
dst[1] = (src[4]) << 24
| src[5] << 16
| src[6] << 8
| src[7];
#if 1
dst[2] = (src[8]) << 24
| src[9] << 16
| src[10] << 8
| src[11];
dst[3] = (src[12]) << 24
| src[13] << 16
| src[14] << 8
| src[15];
#endif
}
The SLPVectorizer attempts to vectorize the above code but instead of a single
PSHUFB like gcc, ends up with a massive ZEXT+SHL+OR monstrosity:
_Z5movbePKhPj: # @_Z5movbePKhPj
movzbl (%rdi), %eax
vmovd %eax, %xmm0
movzbl 1(%rdi), %eax
vmovd %eax, %xmm1
movzbl 2(%rdi), %eax
vpinsrb $4, 4(%rdi), %xmm0, %xmm0
vpinsrb $8, 8(%rdi), %xmm0, %xmm0
vpinsrb $12, 12(%rdi), %xmm0, %xmm0
vmovd %eax, %xmm2
movzbl 3(%rdi), %eax
vpinsrb $1, 5(%rdi), %xmm1, %xmm1
vpinsrb $2, 9(%rdi), %xmm1, %xmm1
vpinsrb $3, 13(%rdi), %xmm1, %xmm1
vpslld $24, %xmm0, %xmm0
vpmovzxbd %xmm1, %xmm1 # xmm1 =
xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
vpslld $16, %xmm1, %xmm1
vpor %xmm0, %xmm1, %xmm0
vpinsrb $1, 6(%rdi), %xmm2, %xmm1
vmovd %eax, %xmm2
vpinsrb $2, 10(%rdi), %xmm1, %xmm1
vpinsrb $3, 14(%rdi), %xmm1, %xmm1
vpinsrb $1, 7(%rdi), %xmm2, %xmm2
vpinsrb $2, 11(%rdi), %xmm2, %xmm2
vpmovzxbd %xmm1, %xmm1 # xmm1 =
xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
vpinsrb $3, 15(%rdi), %xmm2, %xmm2
vpslld $8, %xmm1, %xmm1
vpmovzxbd %xmm2, %xmm2 # xmm2 =
xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
vpor %xmm2, %xmm1, %xmm1
vpor %xmm1, %xmm0, %xmm0
vmovdqu %xmm0, (%rsi)
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20181102/9f7c5584/attachment.html>
More information about the llvm-bugs
mailing list