<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/121691>121691</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Movemask is only generated when N=4
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
LaihoE
</td>
</tr>
</table>
<pre>
When N = 4:
```Rust
#[inline(never)]
pub fn f(arr: &[u32; 8]) -> u64{
let mut mask = 0;
mask |= ((arr[0] == 42) as u64) << 0;
mask |= ((arr[1] == 42) as u64) << 1;
mask |= ((arr[2] == 42) as u64) << 2;
mask |= ((arr[3] == 42) as u64) << 3;
mask
}
```
Produces:
```asm
.LCPI0_0:
.long 42
example::f::h52e1c98f18490021:
vpbroadcastd xmm0, dword ptr [rip + .LCPI0_0]
vpcmpeqd xmm0, xmm0, xmmword ptr [rdi]
vmovmskps eax, xmm0
ret
```
But with any other number of elements you get something along the lines of (N=8 here):
```asm
.LCPI0_0:
.long 42
.LCPI0_1:
.quad 2
.quad 4
.quad 8
.quad 16
example::f::h52e1c98f18490021:
xor ecx, ecx
cmp dword ptr [rdi], 42
sete cl
vpbroadcastd xmm0, dword ptr [rip + .LCPI0_0]
vpcmpeqd xmm0, xmm0, xmmword ptr [rdi + 4]
vpmovzxdq ymm0, xmm0
vpand ymm0, ymm0, ymmword ptr [rip + .LCPI0_1]
xor edx, edx
cmp dword ptr [rdi + 20], 42
sete dl
shl edx, 5
xor esi, esi
cmp dword ptr [rdi + 24], 42
sete sil
shl esi, 6
xor eax, eax
cmp dword ptr [rdi + 28], 42
sete al
shl eax, 7
or eax, esi
vextracti128 xmm1, ymm0, 1
vpor xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 238
vpor xmm0, xmm0, xmm1
vmovq rsi, xmm0
or rax, rdx
or rax, rcx
or rax, rsi
vzeroupper
ret
```
Changing types/alignment/flags does not seem to help. First I thought it had to do with shift by 0 but as it works just fine with n=4 maybe something else is going on.
ir for N=4:
```LLVM
define noundef range(i64 0, 16) i64 @_ZN7example1f17h52e1c98f18490021E(ptr noalias nocapture noundef readonly align 4 dereferenceable(32) %arr) unnamed_addr #0 !dbg !7 {
%0 = load <4 x i32>, ptr %arr, align 4, !dbg !12
%1 = icmp eq <4 x i32> %0, <i32 42, i32 42, i32 42, i32 42>, !dbg !14
%2 = extractelement <4 x i1> %1, i64 0, !dbg !15
%_3 = zext i1 %2 to i64, !dbg !15
%3 = extractelement <4 x i1> %1, i64 1, !dbg !16
%_6 = select i1 %3, i64 2, i64 0, !dbg !16
%4 = or disjoint i64 %_6, %_3, !dbg !17
%5 = extractelement <4 x i1> %1, i64 2, !dbg !18
%_10 = select i1 %5, i64 4, i64 0, !dbg !18
%6 = or disjoint i64 %4, %_10, !dbg !19
%7 = extractelement <4 x i1> %1, i64 3, !dbg !20
%_14 = select i1 %7, i64 8, i64 0, !dbg !20
%8 = or disjoint i64 %6, %_14, !dbg !21
ret i64 %8, !dbg !22
}
```
ir for N=8:
```LLVM
define noundef range(i64 0, 256) i64 @example::f::h52e1c98f18490021(ptr noalias nocapture noundef readonly align 4 dereferenceable(32) %arr) unnamed_addr #0 !dbg !7 {
start:
%_5 = load i32, ptr %arr, align 4, !dbg !12
%_4 = icmp eq i32 %_5, 42, !dbg !14
%_3 = zext i1 %_4 to i64, !dbg !15
%0 = getelementptr inbounds i8, ptr %arr, i64 4, !dbg !16
%1 = load <4 x i32>, ptr %0, align 4, !dbg !16
%2 = icmp eq <4 x i32> %1, <i32 42, i32 42, i32 42, i32 42>, !dbg !17
%3 = select <4 x i1> %2, <4 x i64> <i64 2, i64 4, i64 8, i64 16>, <4 x i64> zeroinitializer, !dbg !18
%4 = getelementptr inbounds i8, ptr %arr, i64 20, !dbg !19
%_25 = load i32, ptr %4, align 4, !dbg !19
%_24 = icmp eq i32 %_25, 42, !dbg !20
%_22 = select i1 %_24, i64 32, i64 0, !dbg !21
%5 = getelementptr inbounds i8, ptr %arr, i64 24, !dbg !22
%6 = load <2 x i32>, ptr %5, align 4, !dbg !22
%7 = icmp eq <2 x i32> %6, <i32 42, i32 42>, !dbg !23
%8 = select <2 x i1> %7, <2 x i64> <i64 64, i64 128>, <2 x i64> zeroinitializer, !dbg !24
%9 = tail call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> %3), !dbg !25
%op.rdx = or disjoint i64 %9, %_22, !dbg !26
%shift = shufflevector <2 x i64> %8, <2 x i64> poison, <2 x i32> <i32 1, i32 poison>, !dbg !25
%10 = or <2 x i64> %8, %shift, !dbg !25
%op.rdx9 = extractelement <2 x i64> %10, i64 0, !dbg !25
%op.rdx10 = or disjoint i64 %op.rdx, %op.rdx9, !dbg !27
%op.rdx11 = or disjoint i64 %op.rdx10, %_3, !dbg !28
ret i64 %op.rdx11, !dbg !29
}
```
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzEWEuP46oS_jVkUzqRwe9FFkl6WjrSnNHRXdwr3U1EDI6ZsU0GcDo9v_4K_IjtxP2YuzitlhwD9X1UUfVRCdVanGrONyjcofBpRRtTSLX5SkUhv6yOkr1u_lPwGr4B8p8gQP4WeVsUee3_vxpt7DvxUbgTdSlqjkhS8wtXiKQofELe9twcIa8hRyShSiF_C4hEKNw1PkH-DhK7iqTwB_K_QBMFKN4hbwsAUHIDVWOgovqHY_eQ38-1Y_HeDiOSdNjhzkPhk13rNkssLtUOlaSA_D3y9x9Bwe-j4PdRyPso5H0U_30U38ZxBGNPJH4anxPytn8ryZqM6-4ER5NUV8jbrr_u__7TO3jtPHR_61LWJ8vrbfmVVueS23l_m7ePIiQcZ2mS4yRIPY_gqfXlfFSSsoxqw-z7tao8RPbAXqRicDYKULhT4gyI7GDYgMuaG0RWnflP1r_3EKPnBIyJzv5SyUulf5x1Z8jpdbC7wStuZnFC3nbXGHgRpgBav4I0BVdQN9WRK5A58JJXvDYaXmUDJ25Ay4qbQtQnoC5YpuBgC0Hb1Ygk35D_lEDBFbcl8RvRhzb-3ZJZiNc_G8qA3I0ABA_GkrsxHP3e0V6lasOaubDax20yq86zM26PhexbV_p1mhvu1pf_dNI4sGCOUsnLryv72b2_jlHGy2jNRtOj5xs7xlOuIZysDSebh9P-zUPq8Ij3VmTZOLK6KMck4Yyfa-G4tfgwd_AWtxaOfGBt4aNHXre1aR8fZU7eYqYPvW5J4tFURz_wTzy_8KtRNDMCk6RLIjw-XzzJgRbqQaJNl-miydkA1S8jfvIJMKtsNidVG9BZOvY-qdYnNcmk-WRbtfPRaRh-cSWb85mr92RzX9D6ZHXQvJ65RuSZluJUW7FE5Dkv6UkDk1xDLQ1oziswEgpentfwLJQ28CeYQjanwoAwUFBm55lslVgXIjdwfAUPjo2xV6Aw8CLVDw3fG20gFzVvV9bIfwqgoq9HPpJmXmoOQsNJ2jdZr9stCwW5VGAlet7afP3677-Qt2XcQdeyqRnPQdH6ZFscEQXQZkFkL2L7igLv8N9vcaelOMfxnYR-QSSxiVxLWgpqI5HRs2nUCJ9TJuvyFVzsIADGFc-54nXG6bG01L5rAhAJbX9AUmjqmlacHShjChDxPUAEs-PJPmLo2ylEQs81UaWkzDYOAVxB2Bbsi3XDlVcHue_J7ccbFiY9EHZAwhYo_znFcjzOzt8Ln7iOZQ9vfGrpRyxBz0IcS1eE3a07kOGOy5XRcBgjmLCHOfgO5xe_GhC4BTbS2ixZ-J8ixjOYaCCOHI7mJc96ar-3Ikv7HswDZy0VMKG_S1GbNsksbmsSHvyZbdzbhp_ygMxgksED7N27EPZmwZILg3205ELQe4DntmlvG3_KhVkkiBNERxDcexD3VsmCB625XZoseTCcAZ6lEWn1XvFhaTJbQB6253d6lPyeHrmtDYL0scbuHxIlbagyfVdpYxneBMqqyWdl6RBMdMkKjEPt2oQFmbnXh0PwjkC0VXHifV7aXYr6aKOlQST3Gx_K5VGh4_dl2Vv2PprK5aIo4_9LlOOpNnblNC9H0nG4wShwo_5-InfBXe3hqKebGNquQ9TCCFqKX1wtSUzwO0dBFlXnQBZzMFg-g5H54xQkD3PwJjMHQu5l6kCGYPlL10WnNoPkfzYQc_EiU-nuk5I8SspwMSA3mHielGSSlNFSUs4TkPhTRb4lIBknYNzhkbsEjIZYYpLcEo58MOHIIBep4zdUlJDRsuyFtiwv1frCMyPVWnHWZHwt1foSOBVJZjXh-oB0xjCoizyvFbsuXTxpf_GQeTYNStD2yi5MRZPnJW_3NQ9MfzNNRs9SaFmPx7vDag8J92fUrbs7p8GLrnVYpu32-XYU0oUuYArYdhGPymMGd9vUPKztfLexjnuGFc-w8NtYfWtz16aRZN4h9ICzdekbjQLytiu28Vnqp3TFNzj2Iy_xgthbFZssjeOj50f4eAyjMIrTY5BHNE-xn8VhlqYrsSEeCT3shZj4ge-t_TRhLIxCzHOW2coOPF5RUa5dWkt1WgmtG77BBEcpXpX0yEvtfjEmpOYv4GYRISh8WqmNNfrj2Jy0rQuhjb7BGGFKvvlLXrj7uVNocH3FiddcUcMZvLjfmu3XsVWjyk1hzNn9YkmeEXk-CVM0x3UmK0SeLWb3-OOs5Hee2a-Zbif2u2e31cuG_C8AAP__Y90CvQ">