<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/88690>88690</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[aarch64] `vbslq_u8` / `BSL` is being folded into `AND`+`OR` and not being optimized back
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
Validark
</td>
</tr>
</table>
<pre>
This movemask routine ([Godbolt link](https://godbolt.org/z/feq485549)):
```c
#include <arm_neon.h>
uint64_t vmovmaskq_u8_(const uint8x16_t p0, const uint8x16_t p1, const uint8x16_t p2, const uint8x16_t p3) {
const uint8x16_t bitmask1 = { 0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10,
0x01, 0x10, 0x01, 0x10, 0x01, 0x10, 0x01, 0x10};
const uint8x16_t bitmask2 = { 0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20,
0x02, 0x20, 0x02, 0x20, 0x02, 0x20, 0x02, 0x20};
const uint8x16_t bitmask3 = { 0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40,
0x04, 0x40, 0x04, 0x40, 0x04, 0x40, 0x04, 0x40};
const uint8x16_t bitmask4 = { 0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80,
0x08, 0x80, 0x08, 0x80, 0x08, 0x80, 0x08, 0x80};
uint8x16_t t0 = vandq_u8(p0, bitmask1);
uint8x16_t t1 = vbslq_u8(bitmask2, p1, t0);
uint8x16_t t2 = vbslq_u8(bitmask3, p2, t1);
uint8x16_t tmp = vbslq_u8(bitmask4, p3, t2);
uint8x16_t sum = vpaddq_u8(tmp, tmp);
return vgetq_lane_u64(vreinterpretq_u64_u8(sum), 0);
}
```
Results in this emit (excluding table data):
```asm
vmovmaskq_u8_: // @vmovmaskq_u8_
adrp x8, .LCPI0_0
ldr q4, [x8, :lo12:.LCPI0_0]
adrp x8, .LCPI0_1
and v0.16b, v0.16b, v4.16b
ldr q4, [x8, :lo12:.LCPI0_1]
adrp x8, .LCPI0_2
and v1.16b, v1.16b, v4.16b
orr v0.16b, v1.16b, v0.16b
ldr q1, [x8, :lo12:.LCPI0_2]
and v1.16b, v2.16b, v1.16b
movi v2.8h, #8
fneg v2.8h, v2.8h
and v2.16b, v3.16b, v2.16b
orr v1.16b, v1.16b, v2.16b
orr v0.16b, v0.16b, v1.16b
addp v0.16b, v0.16b, v0.16b
fmov x0, d0
ret
```
Using inline assembly, I managed to get this emit:
```asm
foo:
adrp x8, .LCPI0_0
ldr q4, [x8, :lo12:.LCPI0_0]
adrp x8, .LCPI0_1
and v0.16b, v0.16b, v4.16b
ldr q4, [x8, :lo12:.LCPI0_1]
adrp x8, .LCPI0_2
bsl v4.16b, v1.16b, v0.16b
ldr q1, [x8, :lo12:.LCPI0_2]
adrp x8, .LCPI0_3
bsl v1.16b, v2.16b, v0.16b
ldr q1, [x8, :lo12:.LCPI0_3]
bsl v1.16b, v3.16b, v0.16b
addp v0.16b, v0.16b, v0.16b
fmov x0, d0
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0V12T4igX_jV4Q41FDiQdL7xo29e3pmpqd2v249Yigso2gXQg2Z759VsQP6IdHe3esSwJcJ5zngM8x8CdUxsj5RSlM5TOR7zxW1tP_-JaCV4_jworvk3_2CqHS9vKkrtnXNvGKyMxghyls_9bUVjtsVbmGaVzBPnW-8oh-ohggWCx6ebHtt4gWHxHsFjLF5anKZsgiF_6iMgckf1vRrrvatcHqsxKN0JiRJ94XS6NtGa8RfR_fVijjM_Y0uO2tG2g-bJs8iWCfGWN8zhM569JtvS4Igie8NvhZHgYhocpgglGD7MuOn5rUSgfaCQY0XkwxOSVxBDkNSFde1d_F-hDXh7miP6QMfQZQ4eFva97-vtA1z8fCnJTQrSfEOuwbO_rnv7NCb0_yE0JsX5CeYfN977u6d-c0PuD9BLaR-sl5ElMpeVGBL0iyDt17sUTy8NsCNjpqi2c3gH3pzfAOy17cgLvo-ECmkZ09OGvBC-rC_i4kVX04uEi3jVlh6-42OftyyqiQtOD1dI3tcHtRvqXpeZGLpuMIcjbWirjZV3VYaLJWOfFNWUsqk_4JPewC6fFtb8jX6VrtHdYGexDoZel8qG4y9dQdpXZYM8LLbHgnl-p19yV3chpBaZXzlf3D4ERI6eYk2PJRV2F9jUeqvGXp98-kyU5tdGiju1L3ACUzjpjRB-1TQDRxwMsnd_gPTmzMSK2LRknWRHsek8sPr2LTTLM5oQKXKCSHAgk16jYuu6TTc4SGKadXKcNR9oDdOAs2EmI0rYq2sM430bnQPNTk7WRm-N89zC8BMdI9Cz62yUYWq4Lthd3up8PF6K6aDiwtuvStvGYxfImyFHeV4T5pwviU0aHty3unCwL_S3gP-OSG76RAnuLN9IfdftDca6tPdjcq7Cfp66fqKxBCmeqKpzutp3drpQ7VHJtMehbJoNi-oBe6ZHJIdFkQDsDIf6jY777XD_tIzGlYkInfCSnyUNCkyxjGR1tpxOQa8EzAoVknPMHmgiRAFA6oTSn-WSkpkCAEZak5IEyoOMiE1DwnNICiiwVDDEiS670WOu2DDeRkXKukdM8zyZkpHkhtYsXIAAj_8FxEkHYwVE9DZhPRbNxiBGtnHdHL155HW9OnNerbcZQOscoI4f3gozg-P-WkdnvX0JPOVzIIOq11UIKrIy3Yfrxl3lYDZihjPz6NVgGVRjrd-a28qpU36XABV89j5paT89uWcpvm2K8siWCReC3az5Vtf1brjyCRczKIVjErP8NAAD__43NlHM">