<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/56330>56330</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Poor code generation on two simple nested loops (arm64)
</td>
</tr>
<tr>
<th>Labels</th>
<td>
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
uncleasm
</td>
</tr>
</table>
<pre>
In the fragment https://godbolt.org/z/Tdjv4xq45
the code shows an image processing algorithm that will handle arbitrary image widths (that are not multiples of given block size).
```
do {
do {
auto d = vld1_u8(src); vst1_u8(dst, vqadd_u8(d,d)); // KERNEL
src += 8; dst += 8;
asm("":"+r"(width)); // Artificial 'barrier' to disable a disaster
width += block_size;
} while (width >= 0);
src += width;
dst += width;
} while (width > block_size);
```
What I can tell, GCC manages the inner loop optimally producing code
```
.L2:
ldr d0, [x2], 8
adds x0, x0, x6
uqadd v0.8b, v0.8b, v0.8b
str d0, [x5], 8
bpl .L2
```
Instead clang tries to make the outer loop constant 0, adding one `mov` to the inner loop to catch the unmodified loop counter, which may or may not slow down the loop.
```
.LBB0_2:
ldr d0, [x2], #8
mov x10, x9
adds x9, x10, x6
uqadd v0.8b, v0.8b, v0.8b
str d0, [x5], #8
b.pl .LBB0_2
add x2, x2, x9
add x5, x5, x9
cmp x10, #0
b.gt .LBB0_2
```
Things become absolutely crazy when the `asm("" : "+r"(width));` statement is omitted:
there are forward jumps and backward jumps, precomputing and rematerializations:.
```
outer_loop_two_loops(long, int, unsigned char*, long, long, unsigned char*, long): // @outer_loop_two_loops(long, int, unsigned char*, long, long, unsigned char*, long)
add x9, x3, x6, lsl #1
add x10, x4, x6, lsl #1
lsl x8, x6, #1
add x9, x9, #8
add x10, x10, #8
.LBB0_1: // =>This Loop Header: Depth=1
mov x11, x10
mov x12, x9
mov x13, x8
mov x14, x6
.LBB0_2: // Parent Loop BB0_1 Depth=1
mov x15, x2
mov x16, x5
cmn x0, x13
ldr d0, [x15], #8
uqadd v0.8b, v0.8b, v0.8b
str d0, [x16], #8
b.mi .LBB0_4
mov x5, x16
mov x2, x15
b .LBB0_6
.LBB0_4: // in Loop: Header=BB0_2 Depth=2
add x16, x2, x0
add x15, x5, x0
add x17, x0, x14
cmp x17, #0
b.le .LBB0_7
add x16, x16, x13
add x15, x15, x13
add x2, x16, #8
add x5, x15, #8
.LBB0_6: // in Loop: Header=BB0_2 Depth=2
add x14, x14, x6
add x13, x13, x6
add x12, x12, x6
add x11, x11, x6
b .LBB0_2
.LBB0_7: // in Loop: Header=BB0_1 Depth=1
add x2, x16, x12
add x5, x15, x11
subs w1, w1, #1
b.ne .LBB0_1
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy9WN1zozYQ_2vwi6YePoyxH_wQx0mb6U3npnMzfcwISQYlAnGSsJP89V0JSACD05u2xzgIpNV-_rS7JJX0dfdQIpMzdFQ4K1hpUG5Mpb3oxgvv4ZdJmkphllJl8PYGf9_o02n18n0Ve_7B82-au-VAJGVI5_KsES4RL3DGUKUkYVrzMkNYZFJxkxcgDht05kKgHJdUMIRVyo3C6rXddebU5Bp54caRYsVQKQ0qamF4JZhG8ogyfmIlSoUkz0jzN-aF22VfI2_ttz_3SiXykn3zjOx1MdFeuDYSUeRFB3QSNHisN6CGVgT4e9EeoZM27STVxgtv0ek7prSdgXdqCVvaxoXo97s__7j7cikKuALJ3oraWHpg2H-f0E0XIMULQ_uzEYKHvXLDxvnsUvaNMvzICccCZpIUK8UZ7EiQtZJrnFr3uydtYOFCpGPbaeW8_ei8PVTPSw7onHPg1WkCDryzW_xGnz5xz-xG6eFyzwvD5WkhfaV6okbh79__sph6QARAapgQNoa_3t6iApeAPe0OAy9LppCQskKyMgBKIV4tlmlNLJQt0ifFLL-4uAy8KKhq7PKtJC_ev4RefLDPmyEhwEjb8cURtvf1kKa2YIPx5C83qUPf8GFArM1YcjwtOa2EG6368957KAEimCIiMPjAAJC0RVGBn5lzmqxN5zQigRZDMnGSQWXrNVlC5NZ-IU9wtztHnoYZgg3J3XxdFpICchntONalBSiwAwwAUYFfkVRusLlBC3mGM31ukpndspyJ0H7vP_5IlLwwGrkLLHDjS9DEaDsTx61bDf7PQF4qly6bYHaGjlVrNA-dTuGM-g1N7FbjKRpSVH0PgBb-WIvMXGgxiapvOWBDo5QRWUAqSrUUgCM4bkTht1cINmtCCtv66Q-O_g26kgAtxACChrmaxqFkFNwYRt8DDzwVc6XlKNUZK4qe6qKypYuiFJPnjylrYaWsflVtXCUDEsUKYK4gr_I3bDjgHThfK0DudDxaYD6as3QPwHkjZJlZAbx0xaQuNc9KAD3JMdh1Y-c6km6cp9lap7irzf7eyv85cqcR1ByAqMW_Jde2DEXBDH17VFafbbCzbsPmg_IK220L4cnzMhb-juhNP2EE756dvzqfRwcoSwBrjb7YzPUb5ExIXLD_wCpbzg7BXDoJOg1m1ifP68dy4-jZZNW5dZQH_6FZCH2FwwJnyRnlfPK5QXGbZmaW122GGeWWsllu4xF9lqiD-WT4L7MsKDifZgtux8aPqxkLG_uD9cxyE89g5IC0HRvWg3itfiRevHSxsls6DB5c0N_jNlcd2sCEbR8yQ9SvD7NESa-XCVbjSFd9qqkqAs3euyeS69p2Q3Rd326YIwt7zK5kiz6zcbJY_6QorTqvTvQXH1RRZ-9Vqtbs8DpVm6CCKaohbMO-Q5L_wiGz6WYydNaUzyNnTRnmgDp1vdvZmdjcLytLuix7uBwtKmY-_QRZ0F1Et9EWLww3gu2-Smhl3fdzxqAfdg0F9MsIKjZ83BbwzYtKBt130wy7L2OsijVEfruoldiNvtnhK7tOl9CwwIsQp274Bb5gnhiBgn_Pta4Z9AH38TqK_EW-I2RDWQg9FtumlESrOIpWa5yEwTHGxF-tFwKnTOgdpEXIiQu-C_0w9BM_gL9NnCwJPiZR6EcxObJjEjDoPKBD4mJpBdv_HizUzumQ1pmGRcG10R-LWLvegnX84Ss8l2pXl0QwaPsWTt-dU_ZvwXFNTA">