[llvm-bugs] [Bug 37427] New: [X86] [AVX2] Suboptimal code for vector select fetching cond from bool[] array
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri May 11 14:45:54 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=37427
Bug ID: 37427
Summary: [X86] [AVX2] Suboptimal code for vector select
fetching cond from bool[] array
Product: libraries
Version: 6.0
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: fabiang at radgametools.com
CC: llvm-bugs at lists.llvm.org
void variable_shift_loop(unsigned int *arr, const bool *control, int count, int
rot0, int rot1)
{
for (int i = 0; i < count; ++i)
{
int rot = control[i] ? rot1 : rot0;
arr[i] = (arr[i] << rot);
}
}
Clang 6.0 targeting x86-64, "-O2 -mavx2".
The inner loop does:
vpxor %xmm2, %xmm2, %xmm2
.LBB0_4: # =>This Inner Loop Header: Depth=1
vpmovzxbw (%rsi,%rcx), %xmm3 # xmm3 =
mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
vpmovzxbw 8(%rsi,%rcx), %xmm4 # xmm4 =
mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
vpmovzxbw 16(%rsi,%rcx), %xmm5 # xmm5 =
mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
vpmovzxbw 24(%rsi,%rcx), %xmm6 # xmm6 =
mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
vpcmpeqw %xmm2, %xmm3, %xmm3
vpmovzxwd %xmm3, %ymm3 # ymm3 =
xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
vpslld $24, %ymm3, %ymm3
vpcmpeqw %xmm2, %xmm4, %xmm4
vpmovzxwd %xmm4, %ymm4 # ymm4 =
xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero
vpslld $24, %ymm4, %ymm4
vpcmpeqw %xmm2, %xmm5, %xmm5
vpmovzxwd %xmm5, %ymm5 # ymm5 =
xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero
vpslld $24, %ymm5, %ymm5
vpcmpeqw %xmm2, %xmm6, %xmm6
vpmovzxwd %xmm6, %ymm6 # ymm6 =
xmm6[0],zero,xmm6[1],zero,xmm6[2],zero,xmm6[3],zero,xmm6[4],zero,xmm6[5],zero,xmm6[6],zero,xmm6[7],zero
vpslld $24, %ymm6, %ymm6
vblendvps %ymm3, %ymm0, %ymm1, %ymm3
vblendvps %ymm4, %ymm0, %ymm1, %ymm4
vblendvps %ymm5, %ymm0, %ymm1, %ymm5
vblendvps %ymm6, %ymm0, %ymm1, %ymm6
vmovdqu (%rdi,%rcx,4), %ymm7
vpsllvd %ymm3, %ymm7, %ymm3
vmovdqu 32(%rdi,%rcx,4), %ymm7
vpsllvd %ymm4, %ymm7, %ymm4
vmovdqu 64(%rdi,%rcx,4), %ymm7
vpsllvd %ymm5, %ymm7, %ymm5
vmovdqu 96(%rdi,%rcx,4), %ymm7
vpsllvd %ymm6, %ymm7, %ymm6
vmovdqu %ymm3, (%rdi,%rcx,4)
vmovdqu %ymm4, 32(%rdi,%rcx,4)
vmovdqu %ymm5, 64(%rdi,%rcx,4)
vmovdqu %ymm6, 96(%rdi,%rcx,4)
addq $32, %rcx
cmpq %rcx, %rdx
jne .LBB0_4
The intermediate stage of converting to words doesn't seem to be helping, it
just results in extra work.
This should just do 4x vpmovzxbd for the "control" loads, then 4x vpcmpeqd to
set up the blend masks, I think.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180511/c25944d8/attachment.html>
More information about the llvm-bugs
mailing list