<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=http://email.email.llvm.org/c/eJzNWFuTojgU_jX4kmqLBER98KFtt3e2at523q0QomQLCJMEtefX70m4iUqrM7tbSyEk5Nxz8uXEWCYfq28pR6WSccZzJLRr0jj7QLKAh-a8gBaiKo9CdBQmRQboc6E1T5AsS6lMVQjzgYxElQZJUpuXnTghUWijKmaELDTaSQUjojBcIcoY13rq-RvPf62fv4sDL5oPkd_cdZcEomBZlXDkBW9MmwSEeMFv9WilRbFHhxDGNmiXSWrQdkuNUSKuDN9uPbKA-8CZkWqrxQ8OPRx5ZNncwbqWcwg98op0lVtqkEZetWIeeXOiSZRoYzugOQq3BoFfIuHbU37rYwlykTdvBCO4aAWhoc5EEKy4i0KpuNkyCoKDt1qjdYosbgyzlKqewBkGGsj63I7elU4j-xc1ljc1xq1Gx7G29xVN8jkNRLohWFCnsVMNX2LXSC51g-GVKpAV2H725pub2XTkNqHRXsoEMQk55RK6S1maJApS0-YUsGRJiU4wxR_w82brk-_NNrZJgoD0-TlUUKdQm4l14sGH0OXbq82XkbHIjmWy2Pdv8NL5d3155B1u5IX-P6uvC6m9skS593ffktQBgMaJ2DDcpsRDyuAGZVlTEkvwPejpXWhRHdsByw4mxb4P_jTUbkl2DWwbt4lxS0PaRjBO_IhkyPyLaOAr7yARb2OYe36RR37gyrIfUwBVkwLY7hTd57wwFngBt3heGoBVu5a4RdPEAaddg_QDBZC7JVXUcKTkUXcTdqHtIIVN7uIgs8qC7zmk-R2m2R4e9MgzENfy9ciIjlfAl8hh3wU7RBQ4YvixwSp2QQYL3fKv07o12DIMLRrFoUYKHkpxjsZPSyFDKS5A7DkpbYjWDsIaYIM2GLNmlwyAWpAaIuOW6uXleD4-hmh2M3ZItucFh9yAdEmpRiyV2uYYbDxZJpnNmSXk515ogPp6O4YBu5W7LdvtAPqmgmEm_QTS3B0botDntHbzPwPGHgj_l2YOUiGXB_c-LRw4_lCd-dQuWSi5-JChgSd0Wja4a58hepgN-2d8weNsbrWccKvuUTZyxvaENrcHtBvG42zhGduokdOv6zXZ4tF9tL_aRAo2UPN8s8j8RwHrCX2VskRfOE0AuEHKhpcmBSJ8b6ts3Fpc7RG9B9FZvBb3HL_eYpsIfKZhdhajxzWEnQYyokFXsbbvo3OhfsLOPRaUWScQjwgcKwvAg0vSpzbty1CQx0LR6Qhb0V1jdqWjs92F4Wx-onHbya9VJ-SBuicaj0o3MX2c_XsT4-JXu3hvYvBV0D4xYd5KXN7L5kVD0FaJD0_iU3XjcEJsYz5OHAyyoov6fzTn1xXprK-iIfsGxPG04PbdIuITdeu3lCsO5384OsmcI4z7iA_O-HVhK1gK9UiVJbaCtUBhhKmamtbWKu5cpVBzJIeKpDE-ppq3XbnbaTiM2h44FPmIFolj7g5qzekMpNQVLWf4Us75gWLo0CRZBckyWNIJnEhTqVZVwTJOdT6pVLZKjSk1YH2dXXs4H1bxlEko9d6z7NC-Xkol_wIfoCu0rjh4_z4LFvP5JF2RJVn6y5D4frwIuM_pzGc7EuJwHhHGgt0kozHP9Aqs9Qgp-BE5EdAGoye_boFYEZ_AjSPs-5EfTjmZzeIZXixwzDHFcyiXeE5FNrVyplLtJ2rlRMbVXsNgBlWi7gcpnIj3BefOYLAQZjTjqz-rWJZG5DQb1J-QCZAVzd9F3T8-CpzIuRFs4mxdOUP_Bjvv_JY>53877</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Suboptimal code generation in arm64 pointer arithmetic
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          uncleasm
      </td>
    </tr>
</table>

<pre>
    The problem is probably only seen on arm64 with the missed opportunity to use post-fix instructions for pointer access.

Given
```
#include <cstdint>
using v4 = float __attribute__((vector_size(16)));
v4* sum(v4 *src, v4 &dst, int64_t stride_xm, int64_t stride_xp) {
    auto a = *reinterpret_cast<v4 *>(reinterpret_cast<char *>(src) + stride_xm);
    auto c = *reinterpret_cast<v4 *>(reinterpret_cast<char *>(src) + stride_xp);
    auto b = *src++;
    auto d = *src++;
    dst = (a + c) + (b + d);
    return src;
}
```
we see good code with post-fix addressing `ldp x,y,[x0],#32`
```
sum(float __vector(4)*, float __vector(4)&, long, long):                      // @sum(float __vector(4)*, float __vector(4)&, long, long)
        ldr     q0, [x0, x2]
        ldr     q1, [x0, x3]
        ldp     q2, q3, [x0], #32
        fadd    v0.4s, v0.4s, v1.4s
        fadd    v1.4s, v2.4s, v3.4s
        fadd    v0.4s, v0.4s, v1.4s
        str     q0, [x1]
        ret
```

However, when this fragment is attempted to be used for (say 3 separate rows)
```
void convolution(v4 *src0, v4 *src1, v4 *src2, int64_t stride_xm, int64_t stride_xp, v4 *dst, int w) {
    do {
        v4 a,b,c;
        src0 = sum(src0, a, stride_xm, stride_xp);
        src1 = sum(src1, b, stride_xm, stride_xp);
        src2 = sum(src2, c, stride_xm, stride_xp);
        *dst++ = (a+b+c);
    } while (--w);
}
```
the code generator has chosen to allocate 9 registers for all these pointers
```
convolution(float __vector(4)*, float __vector(4)*, float __vector(4)*, long, long, float __vector(4)*, int):      // @convolution(float __vector(4)*, float __vector(4)*, float __vector(4)*, long, long, float __vector(4)*, int)
        mov     x8, xzr  // avoidable
        add     x9, x2, x4   // avoidable
        add     x10, x2, x3  // avoidable
        add     x11, x1, x4  // avoidable
        add     x12, x1, x3  // avoidable
        add     x13, x0, x3  // avoidable
        add     x14, x0, x4  // avoidable
.LBB2_1:                                // =>This Inner Loop Header: Depth=1
        ldr     q0, [x13, x8]
        add     x16, x1, x8  // avoidable
        ldr     q1, [x14, x8]
        add     x15, x0, x8  // avoidable
        ldr     q4, [x12, x8]
        subs    w6, w6, #1
        ldr     q5, [x11, x8]
        ldp     q2, q3, [x15]
        fadd    v0.4s, v0.4s, v1.4s
        add     x15, x2, x8  // avoidable
        fadd    v4.4s, v4.4s, v5.4s
        ldp     q6, q1, [x16]
        fadd    v2.4s, v2.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        fadd    v1.4s, v6.4s, v1.4s
        ldr     q3, [x10, x8]
        ldp     q5, q6, [x15]
        fadd    v1.4s, v4.4s, v1.4s
        ldr     q7, [x9, x8]
        add     x8, x8, #32  // avoidable
        fadd    v0.4s, v0.4s, v1.4s
        fadd    v2.4s, v3.4s, v7.4s
        fadd    v3.4s, v5.4s, v6.4s
        fadd    v2.4s, v2.4s, v3.4s
        fadd    v0.4s, v0.4s, v2.4s
        str     q0, [x5], #16
        b.ne    .LBB2_1
        ret
```

There are some 11 avoidable instructions, which could be substituted to the `ldr vector_reg, [base_reg, offset_reg]` and the post-fix `ldp vec0, vec1, [base_reg], #32`


</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzNWEuTqjgU_jW4SbVFwkNcuGjb6blTdXfTeytAlEwB4SZB7fvr5yS8RKXVedVYNCTkvHP4zknHIv1cfWQMVVLEOSsQV3ZI4_wTiRJuirESRojKIvTRkesMaaAvuFIsRaKqhNR1yfUn0gLVCiQJpV92_IR4qbSsE81FqdBOSFjhpWYS0SRhSs0dd-O4r839V35gZfsidNurmRKPl0lepww53luidApCHO-XZrVWvNyjgw9rG7TLBdVou6VaSx7Xmm23DongOrBEC7lV_CeDGQ4dsmwvb93IOfgOeUWqLgw1SCOvSiYOebOiSZgqbSagOfS3GoFfPGXbU3HrZQVykbNoBSP40RpCQ62JIFgyG4VKMr1NKAj23hqNxikS3VhOMioHAmsYaCDrczsGV3qNyb-osbqpMe40Wo61ua5o0q9pINItQUStxl41vIntIL3UDYbXskRGYPfaWWxuZtORmYRGeyFSlAjIKZvQfcrSNJWQmiangCVPK3SCLf6EPydYn1wn2Jgh8Twy5OdYQZNCXSY2iQcvfJtvryZfJtZCs5aLcj88wUvr3_XPIe9wIcd3_1l9fUjNL0-lff5wDUkTABiciAnDbUo8pvRuUFYNJTEEP7yB3oYWNbEdsexgU8zz4M59ZT_JfoDN4DYx7mhIN_CmiR-RDJl_EQ185R0k4m0Ms_dv4sgOTBr2YwagqjMA252k-4KV2gAv4BYrKg2war4lZtA0tcBpvkH6iTzI3YpKqhmS4qj6DbvQdhDcJHd5EHltwPcc0twe08wMj2bkGYjr-AZkRMcr4EvFeG6D7SMKHDH8JaOv2AYZLLSff5PWncGGYWzRJA61UvBYinU0floKGUuxAUqek9KFaG0hrAU2GIMx6-SSAVALUoPnzFC9vBzP16cQzRRji2R7VjLIDUiXjCqUZEKZHIPCk-ciMTmzhPzccwVQ35RjWDCl3JZsWwHUTQXjTPoLSHN3bYxCX9Oa4n8GjAMQ_i_NHKVCIQ72eYosOP6UvfnUfLLQcrExQwtP6LRscdfcffQwG3bP-LzH2ezXcsKdukfZyBnbE9psDegKxuNs_hnbpJHz7-s12eLJOjr8ukTyNtDzfBhk_q2E7wl9F6JC3xhNAbhByoZVOgMifK9Utm5FVzVi8CA8i1d0z_HrEttG4CsNwVmMHtfg9xrIhAZVx8o8j9aF5g6VeyooQS8QTwicagvAg0vSp4r2ZSjIY6Hodfid6H4QXOnobbdhONufcNp28ve6E_JA3xNOR6XfmCHO7r2NsfFrXLy3MfgqaF-YsOgkLu9lc9QSdF3iw5v4VN843hAzWEwTe6Os6KP-H-35dUcaDF00ZN-IOJ6XzDw7RHyib_3ImGRw_oejkygYwniI-OiM3zS2PMmgH6nz1HSwBig013Xb05pexZ6rJGqP5NCRtMbHVLFuKnY7BYdRMwOHQhfRMrXM_UGtPZ2BlKajZQm-lHN-oBg7NEtXXrr0lnQGtuVs9Xsdi0rzguajTgp8Av_af3z0_7uQcGAsmObJrJb5KtO6UlAYmlTcw1odzxMBfeF7nh-6x0slxR_gMEy5UjWDUL0HXrRYzLJVjCM3CAIc7NI0xIQGuyAKdsyP_DjwoezMchqzXK3ANYeQkh2RFQFj8HDGV8QlcOEQu27o-nNGgiAOcBThmGGKF9AZsYLyfG7smAu5n8mVNSmu9woWc2gI1bBI4fC7Lxmz6kA-HNkzIVd1meSMqmJmda-s7X8CMKXnug">