<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/148238>148238</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Bit-reversal cogen broken on x86 systems with `gfni` but no `avx`
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
TDecking
</td>
</tr>
</table>
<pre>
<details>
<summary>LLVM</summary>
```llvm
define noundef i64 @rev_u64_0(i64 noundef %s) unnamed_addr #0 {
start:
%0 = tail call i64 @llvm.bitreverse.i64(i64 %s)
ret i64 %0
}
define noundef i64 @rev_u64_1(i64 noundef %s) unnamed_addr #1 {
start:
%0 = tail call noundef i64 @llvm.bitreverse.i64(i64 %s)
ret i64 %0
}
define noundef i64 @rev_u64_2(i64 noundef %s) unnamed_addr #2 {
start:
%0 = tail call noundef i64 @llvm.bitreverse.i64(i64 %s)
ret i64 %0
}
define noundef i64 @rev_u64_manual(i64 noundef %s) unnamed_addr #1 {
start:
%.sroa.0.8.vec.insert.i = insertelement <2 x i64> <i64 poison, i64 0>, i64 %s, i64 0
%0 = bitcast <2 x i64> %.sroa.0.8.vec.insert.i to <16 x i8>
%r = tail call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %0, <16 x i8> <i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 -128>, i8 noundef zeroext 0) #5
%1 = bitcast <16 x i8> %r to <2 x i64>
%.sroa.010.0.vec.extract = extractelement <2 x i64> %1, i64 0
%2 = tail call noundef i64 @llvm.bswap.i64(i64 %.sroa.010.0.vec.extract)
ret i64 %2
}
declare i64 @llvm.bswap.i64(i64) #3
declare i64 @llvm.bitreverse.i64(i64) #3
declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8 immarg) unnamed_addr #4
attributes #0 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" "target-features"="+gfni,+sse,+sse2" }
attributes #2 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" "target-features"="+gfni,+sse,+sse2,+avx,+avx2,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+crc32,+ssse3" }
attributes #3 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
attributes #5 = { nounwind }
```
</details>
<details>
<summary>Generated assembly</summary>
```asm
rev_u64_0: # @rev_u64_0
bswap rdi
mov rax, rdi
shr rax, 4
movabs rcx, 1085102592571150095
and rax, rcx
and rdi, rcx
shl rdi, 4
or rdi, rax
movabs rax, 3689348814741910323
mov rcx, rdi
and rcx, rax
shr rdi, 2
and rdi, rax
lea rax, [rdi + 4*rcx]
movabs rcx, 6148914691236517205
mov rdx, rax
and rdx, rcx
shr rax
and rax, rcx
lea rax, [rax + 2*rdx]
ret
.LCPI1_0:
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.LCPI1_1:
.zero 16,15
.LCPI1_2:
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.byte 0 # 0x0
.LCPI1_3:
.byte 0 # 0x0
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 0 # 0x0
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.LCPI1_4:
.zero 16,51
.LCPI1_5:
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
.byte 0 # 0x0
.LCPI1_6:
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.byte 0 # 0x0
.byte 128 # 0x80
.byte 64 # 0x40
.byte 32 # 0x20
.byte 16 # 0x10
.byte 8 # 0x8
.byte 4 # 0x4
.byte 2 # 0x2
.LCPI1_7:
.zero 16,85
rev_u64_1: # @rev_u64_1
movq xmm0, rdi
movdqa xmm1, xmm0
gf2p8affineqb xmm1, xmmword ptr [rip + .LCPI1_0], 0
movdqa xmm2, xmmword ptr [rip + .LCPI1_1] # xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
pand xmm1, xmm2
pand xmm0, xmm2
gf2p8affineqb xmm0, xmmword ptr [rip + .LCPI1_2], 0
por xmm0, xmm1
movdqa xmm1, xmm0
gf2p8affineqb xmm1, xmmword ptr [rip + .LCPI1_3], 0
movdqa xmm2, xmmword ptr [rip + .LCPI1_4] # xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
pand xmm1, xmm2
pand xmm0, xmm2
gf2p8affineqb xmm0, xmmword ptr [rip + .LCPI1_5], 0
por xmm0, xmm1
movdqa xmm1, xmm0
gf2p8affineqb xmm1, xmmword ptr [rip + .LCPI1_6], 0
movdqa xmm2, xmmword ptr [rip + .LCPI1_7] # xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
pand xmm1, xmm2
pand xmm0, xmm2
paddb xmm0, xmm0
por xmm0, xmm1
movq rax, xmm0
bswap rax
ret
.LCPI2_0:
.byte 1 # 0x1
.byte 2 # 0x2
.byte 4 # 0x4
.byte 8 # 0x8
.byte 16 # 0x10
.byte 32 # 0x20
.byte 64 # 0x40
.byte 128 # 0x80
.byte 1 # 0x1
.byte 2 # 0x2
.byte 4 # 0x4
.byte 8 # 0x8
.byte 16 # 0x10
.byte 32 # 0x20
.byte 64 # 0x40
.byte 128 # 0x80
rev_u64_2: # @rev_u64_2
vmovq xmm0, rdi
vgf2p8affineqb xmm0, xmm0, xmmword ptr [rip + .LCPI2_0], 0
vmovq rax, xmm0
bswap rax
ret
.LCPI3_0:
.byte 1 # 0x1
.byte 2 # 0x2
.byte 4 # 0x4
.byte 8 # 0x8
.byte 16 # 0x10
.byte 32 # 0x20
.byte 64 # 0x40
.byte 128 # 0x80
.byte 1 # 0x1
.byte 2 # 0x2
.byte 4 # 0x4
.byte 8 # 0x8
.byte 16 # 0x10
.byte 32 # 0x20
.byte 64 # 0x40
.byte 128 # 0x80
rev_u64_manual: # @rev_u64_manual
movq xmm0, rdi
gf2p8affineqb xmm0, xmmword ptr [rip + .LCPI3_0], 0
movq rax, xmm0
bswap rax
ret
```
</details>
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzsWluPq74R_zTOi7XIHgMhD3nYTc5WlU6lPlR9PTLgZOnhkrVNNvv_9JUNIcAScm9P-z9olbDM2DPzm4vHDlypZJ0LMUfeC_KWE17qt0LO_7EU0c8kX0_CIv6cI7aIheZJqhD7hsgzYgtVZhmXn4h9-_79n39DbIHg9fDM8FR_Pqn-0nSbIfIci1WSC5wXZR6LFU58FyOXSLH9UfruD4IgMI_2ZASeQjDDZZ7zTMQ_eBxLjIARjKYviDwrzaVGzAjChplgxJbYKIojnqb76Y1sJ0y0FFshlXAS360F1QLseCk0rp8Ro_l0WZkwrjI9U2V6pso9MY9UHc5UHX491TOelzy9AXoEnqNkwR3iBM5WRE6SKyG1k1ibqn9EKjKRa4zYAvDOqIDYN_OfEbopElXkCBZWNWJivr6v1Ng_72AVJjri6suMR3XRhWGlvuENqrQy3LIHfIengX4X-M52vYJNwFcGy_fQoRAgCHrsBnhY9CdhiyTA1NoRYKi_3fo7qL-pX9-wPYe_Z3mysiquG2epoQ0aR_8hZCF2GhPjbQTM26NM-yh37ZQ1oAfs9wNr_ClxiPWA2GnJI22nq--PRAN49Iuz4ZzEUB98082JI0oMZAp0MyVKuRRjs9c4sWPcA0k6MOSWMPsSYbVLE7NirIey1q2s41rLJCy1UHXhZ0uTzjgrld7IYi2FUjgvVlKYYiFFVEpl7tRnHlngP5I8xnmRp_yPz9DcfyRpKoUuZY4zkRXyE0GQF7mwSnxoHqYCI4CNLELxpDSPfiIAxJYIIMnTJBdPXGUIwDBpLtdCP0WbsuHZBf6Tgc9UzeUXA-j_lAENfSW4LqVQDROCl_UqTxAsELwoJZobOGY5_AksN3d8u2tu4CiXUoI1d65DW_c1RyQj1jBb7mFc2TFcTekJefTzAHEXWLURUZnyCrUxZIekutdKvVSQ1whqpqgr376zrPpRBK_dFtU-Pd61_kXkQnItYsyVElmYfp7uYU3wkOdDs8qe8eiFgHW7W9IMsAUaYyzjpPU0K7b2W3ITQz2iepN7gtsdw0OFsYwsiZLAowS8GXhTSj1CZl6LmedxR0C0M8TmaZy0njZi0zaxLbqQuDOO23GNQpUM5gcz5gYBdacunVHCgA1ZHA1YbPTaE_iuB0VLMAxZ2NGpvlLB92oh70XGCUbwYvqRZyPGWx6H1aduMKOuP6PAfI9OgXg9K2Q8oOhBm90QsrJxxikXtQxoEY0RfGeNAGNE3DNCCo3Is_N98fe_UhuvhuaEn1pgjAkev0z0kl07aA8jx2jXz3rdSArBGeOCIZG2mzJUd4jK4ByFgLRBpf45Y-jlqN4ft-s9ZRAfR_WMid0Obpdg3dfGb6NaBzvd71NrXrNlsLwIFtQ78EEvKcZDqQmmATWaUBqgnTBub92QcWeNpP-dcLo954NRQM-Z9d5wXwvo2Qjct_zWYcx64f4YlX-JStsbenXBfUym_0cD4Y51uDf0aofcsP7dpxbUCeGO1H-PHvi84cQxu9UHxNz168stxfAuq8tju8b_nwJwwdrz6DX-nFlba4h_bA05d57f60RX5J3D5Pc6cdee8XJP1HkyHVpa7MISeK3jIXrp8RDtHii8412Wka-HIlmxjd85NlT7y4PlOpA7J_G4w_ZRyBhvtLTHBsnGHhs0ZwPe0nCRYUFwegaKvKW1yLBXZ3beC_Wq7dZ9PjpHGxuex23joE-rbCcD5AGIyGkDYQCiTX0C1pqBPtpVrK3HZT5yh3xkf7-628dXH3UNe7ibvPu56Tr_-MdT6TwvTYe8FHhVfbnPxx29tOFx3PNOH_rjsL_jw1lmb2BzQF4djrZOMqE5yayvG7rZ608Kxhb361uGq5fFGzqj8Zbrkl7u9oZ2bLNy_RboWn_c0KSMbyCvOSG9wBGHF3zYc7_LaCO7rVNwuNHo_qjfTfATRQwGG4q9uF7C9zK9vloJz44n_FgknQNxp8G-JcAekNI3dNc3dPQn9gq_a-xJQx5aY3svwI1sMXp5Xw84a4txTefFuil_0eJeX1XKj_2uP4nnLJ6xGZ-IOZ16AGxGfW_yNuczP2Bu5AbTFVtNQ99nU1hRIlzhrWZT4k-SORDwyJRSyohPmOPOZsKdAnHdOCYQAnKJyHiSOvalpkKuJ4lSpZhTNwAWTFIeilTZl2QBcvGBLRWB2RVM5NwMegrLtUIuSROl1WEanehUzF8S_VS9Y8VTHBVrkeNQFj9Fjosc7wIfq0-lRabwR6LfMPKJfcfEJzgsNc4L84Rvd8gnk1Km8zetN8rURHhF8LpO9FsZOlGRIXi179lWX08bWfxLRBrBq9VWIXitzdnO4d8BAAD__wgSohg">