<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=http://email.email.llvm.org/c/eJzFVl2TmjAU_TX4kqkD4UN84GF3Hf9C7ZMTJAJtICxJUPvrewkfAsbZttuZOo4k4eTck3tPYmKe3KKvGS2RzCgSpKDoxEshSSlRLlBJaUITFHOZoTOvEeMXRMoEZXmaoUIxmVcsPxGZwxxk4RAgAKUaaOG9hpE6l1lBJeAY4hWtO7jl7i28teydZb-0sadhhYp5JfOCMHZD8MMhxCCDCFCAwi_xTVKtxQm6dgOBYL6F39BlWNDiHbpwxYCHIlpylWbrLnz_G9j9V3dVXsrw6gRHCdKKitT0yH_AEifj1zbWpH9b9A-L_jdYMLI2r10ABB-iJEfXK7LcHWogn-yoQojRpFQeIYVd79rmCZjmowc9urXcCd3IxETPdL0-zrwZ-b4t-BYiDwejyLbCBpX3YZPMkWuUeTgYpt7MjB8Ilbw66prrEOpn9d7LrSlUgtZVTWU7dFRO0Ceoi2J-f-gXsG4Is_xXx_J3s7UAWtUlakRWvx_LLtQgoaXdzKRudveZ7Qf2CHxRb3akBEkp4mdgTXMBYsQcrtrsw7Px1mGmRdvrMNYN3DYewRhpzAB2ggHdthZwqMaM2xm4XQM3gGfczsjtGrh_Vrh9ThR4Y8s24CGbC_zYsrC7mRbesE_PJGcf7tTFTuzrONvq7Sy9p5tEVWN1XXdwzHTUcbZLXz4p9sMhuzjgwMOG0-2JIwre5DpTuC8WpAcEmjH-HdPKNYPcSaKfMnkT0APV6FL8SZeanDS4FD-41P8jl3q_5VL8v1069EM9kZ7P4m-cy0hJO5sOHG_InrnYhHD-h6FBDep9OHomhoPXbg9eM9KblOsDqD8ndZ4j8YL0ETra3H2wuenAfG5zkxUHm7uftLnpoH-0ufsPbD6_Pa2SyE227pas4E8543WkyhOjRBQrVbMok7KCC-BLZ6AU7ocqXoOBocNYMzy-VDX_Tk9wo9vnQigKptz7Lg6cVRadiZ0ENMA08P3As7cexdT1T559OodJEPsrRmLKRASVszAu6QVpCmhDHVefV5BH2MbYdpzQDnyIv956zhnjjYcxxXGMbcuzaQE7fN3yrHmdrupIU8YqFfCSgfnF_SURIk_htq0Fg0KZS0ajnequ1_ftJdAlby_BdQF3c14ibauVFhdpZb8AnwdUwA>53261</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Duplicate constants with arm neon mull2
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
uncleasm
</td>
</tr>
</table>
<pre>
When the same constant is needed both for low and high multiplications (or other low/high arithmetical operations?)
the constant is suboptimally allocated both as an 8-byte and 16-byte variant, when the 16-byte variant would be enough.
```
uint8x16_t compare_ok(uint8x16_t x, uint8x16_t y, uint8x16_t X, uint8x16_t Y) {
auto xx = vmull_u8(vget_low_u8(x), vget_low_u8(X));
xx = vmlsl_u8(xx, vget_low_u8(y), vget_low_u8(Y));
auto XX = vmull_u8(vget_high_u8(x), vget_high_u8(X));
XX = vmlsl_u8(XX, vget_high_u8(y), vget_high_u8(Y));
auto top_byte = vuzpq_u8(vreinterpretq_u8_u16(xx), vreinterpretq_u8_u16(XX)).val[1];
return vshrq_n_u8(top_byte, 7);
}
// optimal usage of registers
umull v4.8h, v0.8b, v2.8b
umull2 v0.8h, v0.16b, v2.16b
umlsl v4.8h, v1.8b, v3.8b
umlsl2 v0.8h, v1.16b, v3.16b
uzp2 v0.16b, v4.16b, v0.16b
ushr v0.16b, v0.16b, #7
uint8x16_t compare_fail(uint8x16_t x, uint8x16_t y) {
return compare_ok(x,y, vdupq_n_u8(33), vdupq_n_u8(119));
}
// same constant is allocated both to 8-byte and 16-byte registers
movi v2.8b, #33
movi v5.8b, #119
movi v3.16b, #33
movi v4.16b, #119
umull v2.8h, v0.8b, v2.8b
umull2 v0.8h, v0.16b, v3.16b
umlsl v2.8h, v1.8b, v5.8b
umlsl2 v0.8h, v1.16b, v4.16b
uzp2 v0.16b, v2.16b, v0.16b
ushr v0.16b, v0.16b, #7
uint8x16_t compare_fail(uint8x16_t x, uint8x16_t y, uint8x8_t coeffs) {
return compare_ok(x,y, vdupq_lane_u8(coeffs, 0), vdupq_lane_u8(coeffs, 1));
}
// same constant is allocated both to 8-byte and 16-byte registers
dup v3.8b, v2.b[0]
dup v4.16b, v2.b[0]
dup v5.8b, v2.b[1]
dup v2.16b, v2.b[1]
umull v3.8h, v0.8b, v3.8b
umull2 v0.8h, v0.16b, v4.16b
umlsl v3.8h, v1.8b, v5.8b
umlsl2 v0.8h, v1.16b, v2.16b
uzp2 v0.16b, v3.16b, v0.16b
ushr v0.16b, v0.16b, #7
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJzFVt3SmyoUfRpzwzQjoIleeNGvme8VTnqV0UjUUxQrYJI-_dniT9SQ-drTzjTjRMDF2ou9FwyJSO_RPzmrkMoZknHJ0FlUUsWVQoVEFWMpS1EiVI4uokFcXFFcpSgvshyVmqui5sU5VgXMQQ4JAAJQZoAOeTewuClUXjIFOI5EzZoe7tB3h4SOe3Dcz13seVipE1Groow5vyP4ExBilBFLUICCT8ldMaMF7_p2C4FgvkO-oOu4oNU3dBWaAw9DrBI6y7d9-OF_5w6P6eqiUsEN704KpJV13LCT-AZLnI3fuliz_n3VP676X2HByNm_9QEQ_GKtBLrdkEMPqIV88pMOIEabMXWCFPa9W5cnYFqOHs1o6NAZ3cTE5cB0uz3PvFv5vq74ViKPR6vIrsIWlY9hm8yJa5J5PFqm3u2MHwhVoj6ZmpsQ-kf9fZDbMKgEa-qGqW7opPFuSFAfxf79OCxg28bc8d-w4x8WawG0birUyrz5fqr6UKOEjna_kLo_PGZ2P9gj8KDB7EjLOGNIXIA1KySIkUu47rIP79bbBrkR7W6DxDRI13gGE2QwIxjvRnTXWsGhGgtuPHJTCzeAF9x44qYW7h816d4zBd7Uci14yOYKP7UcQvfzwlv26SUu-Ic7dbUThzoutno3y-zpNtX1VF1KR8fMRzEO1758UeynQ3Z1wIGHLafbC0eUoi1MpshQLEgPCLRj_Aemk2sH0VmiXzJ5M9AT1eRS8psutTlpdCl5cqn_Sy71fsql5G-7dOwHZiK7XOT_cS6PK9bbdOT4gtyFi20I_DcMDWrQ4MPJMwkcvG538NqR3qxcH0D9JSl-jSQr0mfoZHP6ZHPbgfna5jYrjjanv2lz20H_bHP6B2y-vD1t0oimIQ3jjSoUZ9FB9xfFh1Ekuhbdda4p4ZYpKmQStNENj3Klargefu7tlQFKJ1uwN3Q4b8fXp7oR_7Iz3PfeCyk1A8u--5Ts8CaPErKn2NvjhFLXT9OE0HQfBtj34iTEZ4I3PE4YlxHU1SGkYldkKKANVd4UEXEJcTEO3J3vueE29PCFkL1HCCNJQlzHc1kJu3fb6diKJts0kZGU6EzCRw7Glo-PsZRFBjdpEw744ZqSiybS1ZmzWJYbEzsy2v8DuV0-qQ">