[llvm-dev] [RFC] Memory region declaration intrinsic
Clement Courbet via llvm-dev
llvm-dev at lists.llvm.org
Thu Dec 23 00:56:18 PST 2021
On Wed, Dec 22, 2021 at 8:43 PM Nuno Lopes <nunoplopes at sapo.pt> wrote:
> The non-technical question is whether this matters at all. Do we expect
> any
> perf benefit from improving alias analysis for this case?
>
I've seen a number of very hot functions in our code where alias analysis
was at fault because of this.
The most impressive issue was in a compression algorithm, and the code was
essentially:
```
struct Histogram {
Histogram();
int total;
int values[256];
};
Histogram DoIt(const int* image, int size) {
Histogram histogram;
for (int i = 0; i < size; ++i) {
++histogram.values[image[i]];
++histogram.total;
}
return histogram;
}
```
Because alias analysis is unable to tell that `histogram.total` and
`histogram.values[*]` do not alias, the total has to be incremented one by
one.
It was easy to fix by manually moving `histogram.total` outside of the
loop. And this made compression 1% faster overall, so it does actually
matter quite a lot.
Of course one might argue that this was not optimally written, but it was
written like this, and I've seen other cases where it's not as obvious.
This is the generated code right now:
```
0000000000000000 <_Z4DoItPKii>:
0: 48 89 f8 mov %rdi,%rax
3: 85 d2 test %edx,%edx
5: 7e 4d jle 54 <_Z4DoItPKii+0x54>
7: 41 89 d0 mov %edx,%r8d
a: 83 fa 01 cmp $0x1,%edx
d: 75 04 jne 13 <_Z4DoItPKii+0x13>
f: 31 d2 xor %edx,%edx
11: eb 2f jmp 42 <_Z4DoItPKii+0x42>
13: 44 89 c7 mov %r8d,%edi
16: 83 e7 fe and $0xfffffffe,%edi
19: 31 d2 xor %edx,%edx
1b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) ;
loop (unrolled 2 times)
20: 48 63 0c 96 movslq (%rsi,%rdx,4),%rcx ;
image[i]
24: 83 44 88 04 01 addl $0x1,0x4(%rax,%rcx,4) ; ++
histogram.values[image[i]]
29: 83 00 01 addl $0x1,(%rax)
; ++histogram.total
2c: 48 63 4c 96 04 movslq 0x4(%rsi,%rdx,4),%rcx
31: 83 44 88 04 01 addl $0x1,0x4(%rax,%rcx,4)
36: 83 00 01 addl $0x1,(%rax)
39: 48 83 c2 02 add $0x2,%rdx
3d: 48 39 d7 cmp %rdx,%rdi
40: 75 de jne 20 <_Z4DoItPKii+0x20> ;
end loop
42: 41 f6 c0 01 test $0x1,%r8b
46: 74 0c je 54 <_Z4DoItPKii+0x54>
48: 48 63 0c 96 movslq (%rsi,%rdx,4),%rcx
4c: 83 44 88 04 01 addl $0x1,0x4(%rax,%rcx,4)
51: 83 00 01 addl $0x1,(%rax)
54: c3 ret
```
When I let clang emit range information (note: this was with a previous
iteration of this proposal, but the results are the same), LLVM can now
hoist `histogram.total` out of the loop.
```
0000000000000000 <_Z4DoItPKii>:
0: 48 89 f8 mov %rdi,%rax
3: 85 d2 test %edx,%edx
5: 0f 8e 7d 00 00 00 jle 88 <_Z4DoItPKii+0x88>
b: 41 89 d1 mov %edx,%r9d
e: 49 8d 49 ff lea -0x1(%r9),%rcx
12: 45 89 c8 mov %r9d,%r8d
15: 41 83 e0 03 and $0x3,%r8d
19: 48 83 f9 03 cmp $0x3,%rcx
1d: 73 04 jae 23 <_Z4DoItPKii+0x23>
1f: 31 c9 xor %ecx,%ecx
21: eb 3d jmp 60 <_Z4DoItPKii+0x60>
23: 41 83 e1 fc and $0xfffffffc,%r9d
27: 31 c9 xor %ecx,%ecx
29: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
; loop (unrolled 4 times)
30: 48 63 3c 8e movslq (%rsi,%rcx,4),%rdi
; image[i]
34: 83 44 b8 04 01 addl $0x1,0x4(%rax,%rdi,4) ; ++
histogram.values[image[i]]
39: 48 63 7c 8e 04 movslq 0x4(%rsi,%rcx,4),%rdi
3e: 83 44 b8 04 01 addl $0x1,0x4(%rax,%rdi,4)
43: 48 63 7c 8e 08 movslq 0x8(%rsi,%rcx,4),%rdi
48: 83 44 b8 04 01 addl $0x1,0x4(%rax,%rdi,4)
4d: 48 63 7c 8e 0c movslq 0xc(%rsi,%rcx,4),%rdi
52: 83 44 b8 04 01 addl $0x1,0x4(%rax,%rdi,4)
57: 48 83 c1 04 add $0x4,%rcx
5b: 49 39 c9 cmp %rcx,%r9
5e: 75 d0 jne 30 <_Z4DoItPKii+0x30>
60: 44 8b 08 mov (%rax),%r9d
63: 4d 85 c0 test %r8,%r8
66: 74 1a je 82 <_Z4DoItPKii+0x82>
68: 48 8d 0c 8e lea (%rsi,%rcx,4),%rcx
6c: 31 f6 xor %esi,%esi
6e: 66 90 xchg %ax,%ax
70: 48 63 3c b1 movslq (%rcx,%rsi,4),%rdi
74: 83 44 b8 04 01 addl $0x1,0x4(%rax,%rdi,4)
79: 48 83 c6 01 add $0x1,%rsi
7d: 49 39 f0 cmp %rsi,%r8
80: 75 ee jne 70 <_Z4DoItPKii+0x70>
82: 41 01 d1 add %edx,%r9d
85: 44 89 08 mov %r9d,(%rax)
; histogram.total = iter count
88: c3 ret
```
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20211223/d3cf5357/attachment.html>
More information about the llvm-dev
mailing list