[LLVMbugs] [Bug 16723] New: Increment only one index to scan and operate on arrays in parallel
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Sun Jul 28 07:40:50 PDT 2013
http://llvm.org/bugs/show_bug.cgi?id=16723
Bug ID: 16723
Summary: Increment only one index to scan and operate on arrays
in parallel
Product: libraries
Version: trunk
Hardware: PC
OS: Windows XP
Status: NEW
Severity: enhancement
Priority: P
Component: Common Code Generator Code
Assignee: unassignedbugs at nondot.org
Reporter: bearophile at mailas.com
CC: llvmbugs at cs.uiuc.edu
Classification: Unclassified
This is a potential enhancement request for the back-end.
This is D code, to be compiled with the ldc2 compiler (in D uint is a 32 bit
unsigned integer):
void foo(in uint[] a,
in uint[] b,
in uint[] c,
in uint[] d,
in uint[] e,
uint[] result) pure nothrow {
foreach (immutable i; 0 .. result.length)
result[i] = cast(uint)(a[i] + b[i] + c[i] + d[i] + e[i]);
}
void main() {}
(Compiled with ldc2 v.0.11.0 based on DMD v2.062 and LLVM 3.3svn)
ldmd2 -O -release -inline -noboundscheck -vectorize-loops -vectorize-slp
-vectorize-slp-aggressive -output-s
For the loop of foo ldc2 gives:
LBB0_2:
movl %eax, (%esp)
movl (%ebx), %eax
addl (%ebp), %eax
addl (%edi), %eax
addl (%esi), %eax
addl (%edx), %eax
movl %eax, (%ecx)
movl (%esp), %eax
addl $4, %edx
addl $4, %esi
addl $4, %edi
addl $4, %ebx
addl $4, %ebp
addl $4, %ecx
decl %eax
jne LBB0_2
This is a similar C99 program (D arrays are not just pointers, they are small
structs of ponter-length, but the loop shouldn't be influenced by this):
#include "stdio.h"
#include "stdint.h"
void foo(size_t n,
const uint32_t *restrict a,
const uint32_t *restrict b,
const uint32_t *restrict c,
const uint32_t *restrict d,
const uint32_t *restrict e,
uint32_t *restrict result) {
for (size_t i = 0; i < n; i++)
result[i] = a[i] + b[i] + c[i] + d[i] + e[i];
}
int main() {
return 0;
}
Compiled with:
gcc -std=c99 -Ofast -S
gcc v.4.8.0 gives this for the loop of foo:
L5:
movl 40(%esp), %ecx
movl (%edi,%edx,4), %eax
addl 0(%ebp,%edx,4), %eax
addl (%esi,%edx,4), %eax
addl (%ebx,%edx,4), %eax
addl (%ecx,%edx,4), %eax
movl 44(%esp), %ecx
movl %eax, (%ecx,%edx,4)
addl $1, %edx
cmpl 20(%esp), %edx
jne L5
While for the loop of foo, Intel compiler V.13.0.1 (without restrict) gives:
...
..B2.4: # Preds ..B2.4 ..B2.3
movl (%rsi,%r10,8), %r11d #12.21
addl (%rdx,%r10,8), %r11d #12.28
movl (%r8,%r10,8), %ebp #12.42
addl (%rcx,%r10,8), %r11d #12.35
addl (%r9,%r10,8), %ebp #12.49
addl %ebp, %r11d #12.42
movl %r11d, (%rbx,%r10,8) #12.9
movl 4(%rsi,%r10,8), %r11d #12.21
addl 4(%rdx,%r10,8), %r11d #12.28
movl 4(%r8,%r10,8), %ebp #12.42
addl 4(%rcx,%r10,8), %r11d #12.35
addl 4(%r9,%r10,8), %ebp #12.49
addl %ebp, %r11d #12.42
movl %r11d, 4(%rbx,%r10,8) #12.9
incq %r10 #11.5
cmpq %rax, %r10 #11.5
jb ..B2.4 # Prob 63% #11.5
...
If I use only the a,b,c,d,result arrays, with the Intel compiler (without
restrict), part of foo becomes:
...
..B3.25: # Preds ..B3.23 ..B3.25
movdqu (%rsi,%rax,4), %xmm2 #19.28
movdqu (%rdx,%rax,4), %xmm0 #19.28
lddqu (%rcx,%rax,4), %xmm1 #19.35
paddd %xmm0, %xmm2 #19.28
paddd %xmm1, %xmm2 #19.35
movdqa %xmm2, (%r8,%rax,4) #19.9
addq $4, %rax #18.5
cmpq %r9, %rax #18.5
jb ..B3.25 # Prob 82% #18.5
...
So in the end in this enhancement I suggest to explore the possibility and
performance of replacing increment instructions like:
addl $4, %edx
addl $4, %esi
addl $4, %edi
addl $4, %ebx
addl $4, %ebp
addl $4, %ecx
with a single counter increment, and then use instructions like this to read
and write arrays memory:
movl (%edi,%edx,4), %eax
(Another potential improvement is to use instructions like paddd that sum four
uint at a time and add after the loop a little extra serial code for situations
where the input array lengths are not multiple of 4. But this is an unrelated
and more complex enhancement).
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20130728/688a382c/attachment.html>
More information about the llvm-bugs
mailing list