<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=http://email.email.llvm.org/c/eJzNWFuTojgU_jX4kmqLBER98KFtt3e2at523q0QomQLCJMEtefX70m4iUqrM7tbSyEk5Nxz8uXEWCYfq28pR6WSccZzJLRr0jj7QLKAh-a8gBaiKo9CdBQmRQboc6E1T5AsS6lMVQjzgYxElQZJUpuXnTghUWijKmaELDTaSQUjojBcIcoY13rq-RvPf62fv4sDL5oPkd_cdZcEomBZlXDkBW9MmwSEeMFv9WilRbFHhxDGNmiXSWrQdkuNUSKuDN9uPbKA-8CZkWqrxQ8OPRx5ZNncwbqWcwg98op0lVtqkEZetWIeeXOiSZRoYzugOQq3BoFfIuHbU37rYwlykTdvBCO4aAWhoc5EEKy4i0KpuNkyCoKDt1qjdYosbgyzlKqewBkGGsj63I7elU4j-xc1ljc1xq1Gx7G29xVN8jkNRLohWFCnsVMNX2LXSC51g-GVKpAV2H725pub2XTkNqHRXsoEMQk55RK6S1maJApS0-YUsGRJiU4wxR_w82brk-_NNrZJgoD0-TlUUKdQm4l14sGH0OXbq82XkbHIjmWy2Pdv8NL5d3155B1u5IX-P6uvC6m9skS593ffktQBgMaJ2DDcpsRDyuAGZVlTEkvwPejpXWhRHdsByw4mxb4P_jTUbkl2DWwbt4lxS0PaRjBO_IhkyPyLaOAr7yARb2OYe36RR37gyrIfUwBVkwLY7hTd57wwFngBt3heGoBVu5a4RdPEAaddg_QDBZC7JVXUcKTkUXcTdqHtIIVN7uIgs8qC7zmk-R2m2R4e9MgzENfy9ciIjlfAl8hh3wU7RBQ4YvixwSp2QQYL3fKv07o12DIMLRrFoUYKHkpxjsZPSyFDKS5A7DkpbYjWDsIaYIM2GLNmlwyAWpAaIuOW6uXleD4-hmh2M3ZItucFh9yAdEmpRiyV2uYYbDxZJpnNmSXk515ogPp6O4YBu5W7LdvtAPqmgmEm_QTS3B0botDntHbzPwPGHgj_l2YOUiGXB_c-LRw4_lCd-dQuWSi5-JChgSd0Wja4a58hepgN-2d8weNsbrWccKvuUTZyxvaENrcHtBvG42zhGduokdOv6zXZ4tF9tL_aRAo2UPN8s8j8RwHrCX2VskRfOE0AuEHKhpcmBSJ8b6ts3Fpc7RG9B9FZvBb3HL_eYpsIfKZhdhajxzWEnQYyokFXsbbvo3OhfsLOPRaUWScQjwgcKwvAg0vSpzbty1CQx0LR6Qhb0V1jdqWjs92F4Wx-onHbya9VJ-SBuicaj0o3MX2c_XsT4-JXu3hvYvBV0D4xYd5KXN7L5kVD0FaJD0_iU3XjcEJsYz5OHAyyoov6fzTn1xXprK-iIfsGxPG04PbdIuITdeu3lCsO5384OsmcI4z7iA_O-HVhK1gK9UiVJbaCtUBhhKmamtbWKu5cpVBzJIeKpDE-ppq3XbnbaTiM2h44FPmIFolj7g5qzekMpNQVLWf4Us75gWLo0CRZBckyWNIJnEhTqVZVwTJOdT6pVLZKjSk1YH2dXXs4H1bxlEko9d6z7NC-Xkol_wIfoCu0rjh4_z4LFvP5JF2RJVn6y5D4frwIuM_pzGc7EuJwHhHGgt0kozHP9Aqs9Qgp-BE5EdAGoye_boFYEZ_AjSPs-5EfTjmZzeIZXixwzDHFcyiXeE5FNrVyplLtJ2rlRMbVXsNgBlWi7gcpnIj3BefOYLAQZjTjqz-rWJZG5DQb1J-QCZAVzd9F3T8-CpzIuRFs4mxdOUP_Bjvv_JY>53877</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Suboptimal code generation in arm64 pointer arithmetic
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
uncleasm
</td>
</tr>
</table>
<pre>
The problem is probably only seen on arm64 with the missed opportunity to use post-fix instructions for pointer access.
Given
```
#include <cstdint>
using v4 = float __attribute__((vector_size(16)));
v4* sum(v4 *src, v4 &dst, int64_t stride_xm, int64_t stride_xp) {
auto a = *reinterpret_cast<v4 *>(reinterpret_cast<char *>(src) + stride_xm);
auto c = *reinterpret_cast<v4 *>(reinterpret_cast<char *>(src) + stride_xp);
auto b = *src++;
auto d = *src++;
dst = (a + c) + (b + d);
return src;
}
```
we see good code with post-fix addressing `ldp x,y,[x0],#32`
```
sum(float __vector(4)*, float __vector(4)&, long, long): // @sum(float __vector(4)*, float __vector(4)&, long, long)
ldr q0, [x0, x2]
ldr q1, [x0, x3]
ldp q2, q3, [x0], #32
fadd v0.4s, v0.4s, v1.4s
fadd v1.4s, v2.4s, v3.4s
fadd v0.4s, v0.4s, v1.4s
str q0, [x1]
ret
```
However, when this fragment is attempted to be used for (say 3 separate rows)
```
void convolution(v4 *src0, v4 *src1, v4 *src2, int64_t stride_xm, int64_t stride_xp, v4 *dst, int w) {
do {
v4 a,b,c;
src0 = sum(src0, a, stride_xm, stride_xp);
src1 = sum(src1, b, stride_xm, stride_xp);
src2 = sum(src2, c, stride_xm, stride_xp);
*dst++ = (a+b+c);
} while (--w);
}
```
the code generator has chosen to allocate 9 registers for all these pointers
```
convolution(float __vector(4)*, float __vector(4)*, float __vector(4)*, long, long, float __vector(4)*, int): // @convolution(float __vector(4)*, float __vector(4)*, float __vector(4)*, long, long, float __vector(4)*, int)
mov x8, xzr // avoidable
add x9, x2, x4 // avoidable
add x10, x2, x3 // avoidable
add x11, x1, x4 // avoidable
add x12, x1, x3 // avoidable
add x13, x0, x3 // avoidable
add x14, x0, x4 // avoidable
.LBB2_1: // =>This Inner Loop Header: Depth=1
ldr q0, [x13, x8]
add x16, x1, x8 // avoidable
ldr q1, [x14, x8]
add x15, x0, x8 // avoidable
ldr q4, [x12, x8]
subs w6, w6, #1
ldr q5, [x11, x8]
ldp q2, q3, [x15]
fadd v0.4s, v0.4s, v1.4s
add x15, x2, x8 // avoidable
fadd v4.4s, v4.4s, v5.4s
ldp q6, q1, [x16]
fadd v2.4s, v2.4s, v3.4s
fadd v0.4s, v0.4s, v2.4s
fadd v1.4s, v6.4s, v1.4s
ldr q3, [x10, x8]
ldp q5, q6, [x15]
fadd v1.4s, v4.4s, v1.4s
ldr q7, [x9, x8]
add x8, x8, #32 // avoidable
fadd v0.4s, v0.4s, v1.4s
fadd v2.4s, v3.4s, v7.4s
fadd v3.4s, v5.4s, v6.4s
fadd v2.4s, v2.4s, v3.4s
fadd v0.4s, v0.4s, v2.4s
str q0, [x5], #16
b.ne .LBB2_1
ret
```
There are some 11 avoidable instructions, which could be substituted to the `ldr vector_reg, [base_reg, offset_reg]` and the post-fix `ldp vec0, vec1, [base_reg], #32`
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzNWEuTqjgU_jW4SbVFwkNcuGjb6blTdXfTeytAlEwB4SZB7fvr5yS8RKXVedVYNCTkvHP4zknHIv1cfWQMVVLEOSsQV3ZI4_wTiRJuirESRojKIvTRkesMaaAvuFIsRaKqhNR1yfUn0gLVCiQJpV92_IR4qbSsE81FqdBOSFjhpWYS0SRhSs0dd-O4r839V35gZfsidNurmRKPl0lepww53luidApCHO-XZrVWvNyjgw9rG7TLBdVou6VaSx7Xmm23DongOrBEC7lV_CeDGQ4dsmwvb93IOfgOeUWqLgw1SCOvSiYOebOiSZgqbSagOfS3GoFfPGXbU3HrZQVykbNoBSP40RpCQ62JIFgyG4VKMr1NKAj23hqNxikS3VhOMioHAmsYaCDrczsGV3qNyb-osbqpMe40Wo61ua5o0q9pINItQUStxl41vIntIL3UDYbXskRGYPfaWWxuZtORmYRGeyFSlAjIKZvQfcrSNJWQmiangCVPK3SCLf6EPydYn1wn2Jgh8Twy5OdYQZNCXSY2iQcvfJtvryZfJtZCs5aLcj88wUvr3_XPIe9wIcd3_1l9fUjNL0-lff5wDUkTABiciAnDbUo8pvRuUFYNJTEEP7yB3oYWNbEdsexgU8zz4M59ZT_JfoDN4DYx7mhIN_CmiR-RDJl_EQ185R0k4m0Ms_dv4sgOTBr2YwagqjMA252k-4KV2gAv4BYrKg2war4lZtA0tcBpvkH6iTzI3YpKqhmS4qj6DbvQdhDcJHd5EHltwPcc0twe08wMj2bkGYjr-AZkRMcr4EvFeG6D7SMKHDH8JaOv2AYZLLSff5PWncGGYWzRJA61UvBYinU0floKGUuxAUqek9KFaG0hrAU2GIMx6-SSAVALUoPnzFC9vBzP16cQzRRji2R7VjLIDUiXjCqUZEKZHIPCk-ciMTmzhPzccwVQ35RjWDCl3JZsWwHUTQXjTPoLSHN3bYxCX9Oa4n8GjAMQ_i_NHKVCIQ72eYosOP6UvfnUfLLQcrExQwtP6LRscdfcffQwG3bP-LzH2ezXcsKdukfZyBnbE9psDegKxuNs_hnbpJHz7-s12eLJOjr8ukTyNtDzfBhk_q2E7wl9F6JC3xhNAbhByoZVOgMifK9Utm5FVzVi8CA8i1d0z_HrEttG4CsNwVmMHtfg9xrIhAZVx8o8j9aF5g6VeyooQS8QTwicagvAg0vSp4r2ZSjIY6Hodfid6H4QXOnobbdhONufcNp28ve6E_JA3xNOR6XfmCHO7r2NsfFrXLy3MfgqaF-YsOgkLu9lc9QSdF3iw5v4VN843hAzWEwTe6Os6KP-H-35dUcaDF00ZN-IOJ6XzDw7RHyib_3ImGRw_oejkygYwniI-OiM3zS2PMmgH6nz1HSwBig013Xb05pexZ6rJGqP5NCRtMbHVLFuKnY7BYdRMwOHQhfRMrXM_UGtPZ2BlKajZQm-lHN-oBg7NEtXXrr0lnQGtuVs9Xsdi0rzguajTgp8Av_af3z0_7uQcGAsmObJrJb5KtO6UlAYmlTcw1odzxMBfeF7nh-6x0slxR_gMEy5UjWDUL0HXrRYzLJVjCM3CAIc7NI0xIQGuyAKdsyP_DjwoezMchqzXK3ANYeQkh2RFQFj8HDGV8QlcOEQu27o-nNGgiAOcBThmGGKF9AZsYLyfG7smAu5n8mVNSmu9woWc2gI1bBI4fC7Lxmz6kA-HNkzIVd1meSMqmJmda-s7X8CMKXnug">