<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=http://email.email.llvm.org/c/eJzFVl2TmjAU_TX4kqkD4UN84GF3Hf9C7ZMTJAJtICxJUPvrewkfAsbZttuZOo4k4eTck3tPYmKe3KKvGS2RzCgSpKDoxEshSSlRLlBJaUITFHOZoTOvEeMXRMoEZXmaoUIxmVcsPxGZwxxk4RAgAKUaaOG9hpE6l1lBJeAY4hWtO7jl7i28teydZb-0sadhhYp5JfOCMHZD8MMhxCCDCFCAwi_xTVKtxQm6dgOBYL6F39BlWNDiHbpwxYCHIlpylWbrLnz_G9j9V3dVXsrw6gRHCdKKitT0yH_AEifj1zbWpH9b9A-L_jdYMLI2r10ABB-iJEfXK7LcHWogn-yoQojRpFQeIYVd79rmCZjmowc9urXcCd3IxETPdL0-zrwZ-b4t-BYiDwejyLbCBpX3YZPMkWuUeTgYpt7MjB8Ilbw66prrEOpn9d7LrSlUgtZVTWU7dFRO0Ceoi2J-f-gXsG4Is_xXx_J3s7UAWtUlakRWvx_LLtQgoaXdzKRudveZ7Qf2CHxRb3akBEkp4mdgTXMBYsQcrtrsw7Px1mGmRdvrMNYN3DYewRhpzAB2ggHdthZwqMaM2xm4XQM3gGfczsjtGrh_Vrh9ThR4Y8s24CGbC_zYsrC7mRbesE_PJGcf7tTFTuzrONvq7Sy9p5tEVWN1XXdwzHTUcbZLXz4p9sMhuzjgwMOG0-2JIwre5DpTuC8WpAcEmjH-HdPKNYPcSaKfMnkT0APV6FL8SZeanDS4FD-41P8jl3q_5VL8v1069EM9kZ7P4m-cy0hJO5sOHG_InrnYhHD-h6FBDep9OHomhoPXbg9eM9KblOsDqD8ndZ4j8YL0ETra3H2wuenAfG5zkxUHm7uftLnpoH-0ufsPbD6_Pa2SyE227pas4E8543WkyhOjRBQrVbMok7KCC-BLZ6AU7ocqXoOBocNYMzy-VDX_Tk9wo9vnQigKptz7Lg6cVRadiZ0ENMA08P3As7cexdT1T559OodJEPsrRmLKRASVszAu6QVpCmhDHVefV5BH2MbYdpzQDnyIv956zhnjjYcxxXGMbcuzaQE7fN3yrHmdrupIU8YqFfCSgfnF_SURIk_htq0Fg0KZS0ajnequ1_ftJdAlby_BdQF3c14ibauVFhdpZb8AnwdUwA>53261</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            Duplicate constants with arm neon mull2
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          uncleasm
      </td>
    </tr>
</table>

<pre>
    When the same constant is needed both for low and high multiplications (or other low/high arithmetical operations?)
the constant is suboptimally allocated both as an 8-byte and 16-byte variant, when the 16-byte variant would be enough.

```
uint8x16_t compare_ok(uint8x16_t x, uint8x16_t y, uint8x16_t X, uint8x16_t Y) {
    auto xx = vmull_u8(vget_low_u8(x), vget_low_u8(X));
    xx = vmlsl_u8(xx, vget_low_u8(y), vget_low_u8(Y));

    auto XX = vmull_u8(vget_high_u8(x), vget_high_u8(X));
    XX = vmlsl_u8(XX, vget_high_u8(y), vget_high_u8(Y));

    auto top_byte = vuzpq_u8(vreinterpretq_u8_u16(xx), vreinterpretq_u8_u16(XX)).val[1];
    return vshrq_n_u8(top_byte, 7);
}
        // optimal usage of registers
        umull   v4.8h, v0.8b, v2.8b
        umull2  v0.8h, v0.16b, v2.16b
        umlsl   v4.8h, v1.8b, v3.8b
        umlsl2  v0.8h, v1.16b, v3.16b
        uzp2    v0.16b, v4.16b, v0.16b
        ushr    v0.16b, v0.16b, #7

uint8x16_t compare_fail(uint8x16_t x, uint8x16_t y) {
    return compare_ok(x,y, vdupq_n_u8(33), vdupq_n_u8(119));
}
        // same constant is allocated both to 8-byte and 16-byte registers
        movi    v2.8b, #33
        movi    v5.8b, #119
        movi    v3.16b, #33
        movi    v4.16b, #119
        umull   v2.8h, v0.8b, v2.8b
        umull2  v0.8h, v0.16b, v3.16b
        umlsl   v2.8h, v1.8b, v5.8b
        umlsl2  v0.8h, v1.16b, v4.16b
        uzp2    v0.16b, v2.16b, v0.16b
        ushr    v0.16b, v0.16b, #7

uint8x16_t compare_fail(uint8x16_t x, uint8x16_t y, uint8x8_t coeffs) {
    return compare_ok(x,y, vdupq_lane_u8(coeffs, 0), vdupq_lane_u8(coeffs, 1));
}
        // same constant is allocated both to 8-byte and 16-byte registers
        dup     v3.8b, v2.b[0]
        dup     v4.16b, v2.b[0]
        dup     v5.8b, v2.b[1]
        dup     v2.16b, v2.b[1]
        umull   v3.8h, v0.8b, v3.8b
        umull2  v0.8h, v0.16b, v4.16b
        umlsl   v3.8h, v1.8b, v5.8b
        umlsl2  v0.8h, v1.16b, v2.16b
        uzp2    v0.16b, v3.16b, v0.16b
        ushr    v0.16b, v0.16b, #7
```

</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzFVt3SmyoUfRpzwzQjoIleeNGvme8VTnqV0UjUUxQrYJI-_dniT9SQ-drTzjTjRMDF2ou9FwyJSO_RPzmrkMoZknHJ0FlUUsWVQoVEFWMpS1EiVI4uokFcXFFcpSgvshyVmqui5sU5VgXMQQ4JAAJQZoAOeTewuClUXjIFOI5EzZoe7tB3h4SOe3Dcz13seVipE1Groow5vyP4ExBilBFLUICCT8ldMaMF7_p2C4FgvkO-oOu4oNU3dBWaAw9DrBI6y7d9-OF_5w6P6eqiUsEN704KpJV13LCT-AZLnI3fuliz_n3VP676X2HByNm_9QEQ_GKtBLrdkEMPqIV88pMOIEabMXWCFPa9W5cnYFqOHs1o6NAZ3cTE5cB0uz3PvFv5vq74ViKPR6vIrsIWlY9hm8yJa5J5PFqm3u2MHwhVoj6ZmpsQ-kf9fZDbMKgEa-qGqW7opPFuSFAfxf79OCxg28bc8d-w4x8WawG0birUyrz5fqr6UKOEjna_kLo_PGZ2P9gj8KDB7EjLOGNIXIA1KySIkUu47rIP79bbBrkR7W6DxDRI13gGE2QwIxjvRnTXWsGhGgtuPHJTCzeAF9x44qYW7h816d4zBd7Uci14yOYKP7UcQvfzwlv26SUu-Ic7dbUThzoutno3y-zpNtX1VF1KR8fMRzEO1758UeynQ3Z1wIGHLafbC0eUoi1MpshQLEgPCLRj_Aemk2sH0VmiXzJ5M9AT1eRS8psutTlpdCl5cqn_Sy71fsql5G-7dOwHZiK7XOT_cS6PK9bbdOT4gtyFi20I_DcMDWrQ4MPJMwkcvG538NqR3qxcH0D9JSl-jSQr0mfoZHP6ZHPbgfna5jYrjjanv2lz20H_bHP6B2y-vD1t0oimIQ3jjSoUZ9FB9xfFh1Ekuhbdda4p4ZYpKmQStNENj3Klargefu7tlQFKJ1uwN3Q4b8fXp7oR_7Iz3PfeCyk1A8u--5Ts8CaPErKn2NvjhFLXT9OE0HQfBtj34iTEZ4I3PE4YlxHU1SGkYldkKKANVd4UEXEJcTEO3J3vueE29PCFkL1HCCNJQlzHc1kJu3fb6diKJts0kZGU6EzCRw7Glo-PsZRFBjdpEw744ZqSiybS1ZmzWJYbEzsy2v8DuV0-qQ">