<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/141630>141630</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Converting loop pointer bump to index increases register pressure
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
dzaima
</td>
</tr>
</table>
<pre>
https://godbolt.org/z/oEzY8G6PK
The code:
```c
#include<stddef.h>
__attribute__((noreturn))
void unexpected(size_t i);
void external(int,int,int,int,int);
void foo(int* start, size_t n, int arg1, int arg2, int arg3, int arg4) {
int* curr = start;
int* end = start + n;
while (curr < end) {
int val = *curr;
if (__builtin_expect(val < 0, 0)) {
unexpected(curr - start);
}
external(val, arg1, arg2, arg3, arg4);
curr++;
}
}
```
compiled with `-O3` gets compiled to this core loop:
```asm
.LBB0_2:
mov edi, dword ptr [r12 + r13]
test edi, edi
js .LBB0_5
mov esi, r15d
mov edx, r14d
mov ecx, ebp
mov r8d, dword ptr [rsp + 4] ; avoidable!
call external
lea rax, [r12 + r13]
add rax, 4
add r13, 4
cmp rax, rbx
jb .LBB0_2
```
which contains a stack reload, as a consequence of LLVM replacing the pointer bump with a (scaled) index, thus taking two registers to store `curr`. Whereas gcc produces:
```asm
.L3:
mov r8d, r13d
mov ecx, r12d
mov edx, ebp
mov esi, ebx
call "external"
add r15, 4
cmp r15, r14
jnb .L1
.L4: mov edi, DWORD PTR [r15]
test edi, edi
jns .L3
```
which doesn't contain the reload, and also saves two instructions on computing `curr` for the comparison (by not having to do it at all, whereas LLVM does it in an inefficient way (the `lea; add; add` could trivially be `add; lea`, but this is unrelated to the main issue)), and as such is two instructions shorter.
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJyMVt9v4ygQ_mvIy6iRjX_EfchD0m7u4Xra1Wp1q3uKMExiugR8gJO2f_0JbDdOmq1Osoztb2YYvvkGw5yTe424JMWaFI8z1vnG2KV4Y_LAZrURr8vG-9aRbEXohtDN3ojaKD83dk_o5o3Qjfny9k_1R_ntT5KsSLL60SBwIzB4xA-kTPqLhxeaSc1VF_AH54XA3bwh2ZfedLtl3ltZdx63W0IrQittLPrOakLvw5WsjkYK6DS-tMg9CkIrJ99w60EGPFv3kaIVvni0milCK6k9oQ-_uV-57YwZPVbgPLPBEIZZdHiW2gOz-3TyTCfP2eQ5J_QeyCLEBwAYovLOWiDZ4xA-u4JRizMKhK5Bn21OjVQIhFZDkIdgfjnLEAqOTMU4hK6C8TlGNNiFINtt3Unlpd72jBJa9V4PkIRlJD3xY_AL4mMCdyNF95fhyeJx8jYpxTHcH975G7kbeRs4uwwWs6frcL1_7ycY7qPG-jJyc2ilQgEn6RsgZXL3NSNlAnv0Dt5Bb8A3MnywCMqYdpDsGIu5A0lW86f1OtnSHhvzOZhjvywhQ87iZKyA1lsgxdqmNJbMphkpphx4dH7iFIYz-Oz6sZ-uuDWXi242LcTNTF56NL-J8ohi3V6BthIfF-DauICcFI9AsjWw0BasVkhoOi0KU-qitmdIIQPL4pyfMcKEiONgmt-C0uwd4od2am7rlymB9ZRAeiWKUyN5A9xoz6R2wIJo-S-wqAyLDLDwkRvt8N8ONUcwO3h6-vsvsNgqxqXeg28QWiO1Rwt1d2h7dbHQRY4zhbEJpRYYs_NN58CzX9HzZMDiXjqP1gXdOR80F_bEIOwymcPPBi0yB3vOobVGdBzdb_SY3dbiUEqbZp8owKb0E_V81MdEeHhBdyw-ofTc2PRm9Yrrwr4XsYdsOgWfdQ3zpzQuMyfZCj722uPPr98f4duP772yiv_TY8_aDdrIbupCGHSa0IUfFRJrPRGHFsCUM-DYEV0sp9TO2457abQDo-Ou0vlQ7HNVYWdsjBRAZqUzOoilfgVtPDTsGLVhQBiQHpgHpuLWeBq0EPUXcguw1MA0SI27neQStYcTew3hwgSkTBSy2KtCjEOZADedEuCtPEqm1CvU0XSwCR5l3OTrzvdboXTQaYuK-XF_RDgEPqRzHQ7_4JEQB67jTfD5QIhrjPVo5zOxzMR9ds9muEwXeZWVeUbzWbNkdYEZr-6Tssz4glX1IueiKMoqRUp5gjO5pAktkoIu0jIvaD4vaVVxpGVaJ6zEckfyBA9MqrlSx0M4jsxijss0T8ssmSlWo3LxWEOpxtO4AhpOOXYZnO7qbu9InijpvDuH8dIrXD4YfUQbCxp-DZeN703f5yA1D5VC997e0Fp0rrM466y6PjtJ33T1nJsDoZsw3zDctdY8x3_vJmbpCN0Myzgu6X8BAAD__-HBy2A">