<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/128377>128377</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
LLVM fails to optimize average round op down to a single average round instruction
</td>
</tr>
<tr>
<th>Labels</th>
<td>
new issue
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
johnplatts
</td>
</tr>
</table>
<pre>
Here is a snippet of LLVM IR code that LLVM fails to optimize down to an average round instruction on SSE2/AArch64/POWER8 (and other targets with a SIMD average round instruction):
```
define dso_local <16 x i8> @AvgRoundU8x16(<16 x i8> %vec_a, <16 x i8> %vec_b) local_unnamed_addr #0 {
%vec_a_shr_1 = lshr <16 x i8> %vec_a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%vec_b_shr_1 = lshr <16 x i8> %vec_b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%vec_a_or_vec_b = or <16 x i8> %vec_b, %vec_a
%vec_a_or_vec_b_lsb = and <16 x i8> %vec_a_or_vec_b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%sum0 = add <16 x i8> %vec_b_shr_1, %vec_a_shr_1
%sum1 = add <16 x i8> %sum0, %vec_a_or_vec_b_lsb
ret <16 x i8> %sum1
}
define dso_local <16 x i8> @AvgRoundI8x16(<16 x i8> %vec_a, <16 x i8> %vec_b) local_unnamed_addr #0 {
%vec_a_shr_1 = ashr <16 x i8> %vec_a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%vec_b_shr_1 = ashr <16 x i8> %vec_b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%vec_a_or_vec_b = or <16 x i8> %vec_b, %vec_a
%vec_a_or_vec_b_lsb = and <16 x i8> %vec_a_or_vec_b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
%sum0 = add <16 x i8> %vec_b_shr_1, %vec_a_shr_1
%sum1 = add <16 x i8> %sum0, %vec_a_or_vec_b_lsb
ret <16 x i8> %sum1
}
define dso_local <8 x i16> @AvgRoundU16x8(<8 x i16> %vec_a, <8 x i16> %vec_b) local_unnamed_addr #0 {
%vec_a_shr_1 = lshr <8 x i16> %vec_a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%vec_b_shr_1 = lshr <8 x i16> %vec_b, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%vec_a_or_vec_b = or <8 x i16> %vec_b, %vec_a
%vec_a_or_vec_b_lsb = and <8 x i16> %vec_a_or_vec_b, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%sum0 = add <8 x i16> %vec_b_shr_1, %vec_a_shr_1
%sum1 = add <8 x i16> %sum0, %vec_a_or_vec_b_lsb
ret <8 x i16> %sum1
}
define dso_local <8 x i16> @AvgRoundI16x8(<8 x i16> %vec_a, <8 x i16> %vec_b) local_unnamed_addr #0 {
%vec_a_shr_1 = ashr <8 x i16> %vec_a, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%vec_b_shr_1 = ashr <8 x i16> %vec_b, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%vec_a_or_vec_b = or <8 x i16> %vec_b, %vec_a
%vec_a_or_vec_b_lsb = and <8 x i16> %vec_a_or_vec_b, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
%sum0 = add <8 x i16> %vec_b_shr_1, %vec_a_shr_1
%sum1 = add <8 x i16> %sum0, %vec_a_or_vec_b_lsb
ret <8 x i16> %sum1
}
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
```
The above LLVM IR snippet being compiled for x86_64, AArch64, and POWER8 can be found over at Compiler Explorer at https://godbolt.org/z/TKrra9ohh.
</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJzsV12P2jgU_TXm5arIcUgID3mgfGhH22pXM93dR-TEl8SVY0e2A0x__crAMMBAt7PtbqfVSIhYtu-5x9f3yDrcOVlpxJwkb0ky7fHO18bmH02tW8W9d73CiPv8F7QI0gEHp2XbogezhHfv_nwPN7dQGoHga-53M0sulQNvwLReNvITgjBrHSa4Br5CyysEazotQGrnbVd6aTQYDXd3M0bYfDy2ZZ0OCJv__ttfs9sMCMu4FmB8jRY8txV6B2vpa-Bwd_N-eh2VsBGJx4SOSUr3PzoWuJQaQTizUKbkCkg8iVLYgMxIPAMyoONVdRug_sg2UUpYdraBJSssF5ywyXnobqUgbARb6EWnNW9QLLgQFgiLKZDhW0LHcEBZuNouIiDxFJSr7WXEh1wygygMv8s3nh0TL76IePFyiB8KbuxiS21L3XyW-EP1Ty7sIX6h3A4jNOflazvsfTllCNxc19Adc3GF-f52j4uwn3mEiK5DhAQnscc120JY9BfDQgIynIb_5yj15r9T6gWh8h9VqNeJv7AOfVXqj6_ULARE6fmbGqWbbKfU4w2n8nm68rVv6vVcMkr3pf-KwT-_jReP9K0JfE451wg8SzkXyvhEOd-2nucKuHCM5yrgFOKLFfAk7F8q4Ob_VQD_3gq4TuBVAT-TArj3VhadR7fvyXga-hKazvnWmsqic6DN0iKCNhbLzrowcve6BG06vZZawFoqZdF3VkODjbH3hGXaaAzdv0915OUIHX-oEXhhVngwpA8etUCpKyhN00qFApbGwiZLF8FaTuDgMifbm91bzZJrKBCWWyNpVmiBe5jsECzMNq0ydjdZe9-6YC7ZnLB5ZURhlO8bWxE2_0TY_MOv1vKRqet-T-SxGMUj3sM8Gg4oS5OUZr06p0mRRRhhzMssyZIkoxEbjQSOBku6RDboyZxRllDGGItpOsj6ZREtWUKLKKIiLtmIDCg2XKq-UqsmJO9J5zrMI5bFw2FP8QKV23p7xjSuYbtKGAtW3-Yh6E3RVY4MqJLOu0cYL73C_IqfP7Xbpn00-OCkrtT5jiND3uusys9KJ33dFf3SNITNA4H9501rzUcsPWHzLW1H2Hx_rlXO_g4AAP__A4KRwQ">