<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/66513>66513</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
[InstCombine][X86] Failure to replace @llvm.x86.sse41.pblendvb with select
</td>
</tr>
<tr>
<th>Labels</th>
<td>
backend:X86,
new issue,
llvm:instcombine,
missed-optimization
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
RKSimon
</td>
</tr>
</table>
<pre>
https://godbolt.org/z/n8qYT9hvc
In many cases we can replace SSE pblendvb intrinsics with select nodes, by determining that the condition element is a sign-extended compare result (or logic combination of them).
But in some circumstance the logic fails to simplify and we end up stuck with the pblendvb intrinsics, which prevents further generic folds from occurring.
```c
#include <x86intrin.h>
auto tricky(__m128i a, __m128i b, __m128i c, __m128i src) {
// Valid (> 0) weights
__m128i aValid = _mm_cmpgt_epi32( a, _mm_setzero_si128() );
__m128i bValid = _mm_cmpgt_epi32( b, _mm_setzero_si128() );
__m128i cValid = _mm_cmpgt_epi32( c, _mm_setzero_si128() );
__m128i bothValid = _mm_and_si128( aValid, bValid );
__m128i allValid = _mm_xor_si128( bothValid, cValid );
// Force a / b
__m128i forceA = _mm_and_si128( allValid, aValid );
__m128i forceB = _mm_and_si128( allValid, bValid );
// Determine output
__m128i out = _mm_and_si128( src, bothValid );
out = _mm_blendv_epi8( out, a, forceA );
out = _mm_blendv_epi8( out, b, forceB );
return out;
}
```
```ll
define <2 x i64> @tricky(<2 x i64> noundef %a, <2 x i64> noundef %b, <2 x i64> noundef %c, <2 x i64> noundef %src) {
entry:
%0 = bitcast <2 x i64> %a to <4 x i32>
%cmp.i = icmp sgt <4 x i32> %0, zeroinitializer
%sext.i = sext <4 x i1> %cmp.i to <4 x i32>
%1 = bitcast <4 x i32> %sext.i to <2 x i64>
%2 = bitcast <2 x i64> %b to <4 x i32>
%cmp.i21 = icmp sgt <4 x i32> %2, zeroinitializer
%sext.i22 = sext <4 x i1> %cmp.i21 to <4 x i32>
%3 = bitcast <4 x i32> %sext.i22 to <2 x i64>
%4 = bitcast <2 x i64> %c to <4 x i32>
%cmp.i23 = icmp sgt <4 x i32> %4, zeroinitializer
%sext.i24 = sext <4 x i1> %cmp.i23 to <4 x i32>
%5 = bitcast <4 x i32> %sext.i24 to <2 x i64>
%and.i = and <2 x i64> %3, %1
%xor.i = xor <2 x i64> %and.i, %5
%and.i25 = and <2 x i64> %xor.i, %1
%and.i26 = and <2 x i64> %xor.i, %3
%and.i27 = and <2 x i64> %and.i, %src
%6 = bitcast <2 x i64> %and.i27 to <16 x i8>
%7 = bitcast <2 x i64> %a to <16 x i8>
%8 = bitcast <2 x i64> %and.i25 to <16 x i8>
%9 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %6, <16 x i8> %7, <16 x i8> %8)
%10 = bitcast <2 x i64> %b to <16 x i8>
%11 = bitcast <2 x i64> %and.i26 to <16 x i8>
%12 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %9, <16 x i8> %10, <16 x i8> %11)
%13 = bitcast <16 x i8> %12 to <2 x i64>
ret <2 x i64> %13
}
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
```
I don't know if its the endless bitcasts to/from <2 x i64> due to the __m128i type, or if something else is going on.
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJysV01v4zgS_TX0pRBDIiVZOviQxG2gsbftxWL3FFBU2eKEIjUklTj59QNKsuMPxfY0Bggc8-O9qvckllncObnViEuSPpF0NeOdr41d_vtfv2Rj9Kw01cey9r51hD0SuiZ0vTVVaZSfG7sldP1J6Frnf_7_P0X9Jki0ItHj8PlTQ8P1Bwju0ME7guAaLLaKC4Rfv35AWyrU1VsJUnsrtZPCwbv0NThUKDxoU6Ej9BnKD6jQo22klnoLvuYefI0gjK6kl0YDKmxQe5AOOAQ9D7jzqCusQJim5RbBouuUB0JzY0GZrRRhqZSa9wxmEygbQov5sYinzoPU4EyDIKQVXeM81wL7-APLhkvlwBtwsmmV3HwA11XQi7qCrgXnO_E6CAugCdVB43stRQ2txTfU3sGms75GC1vUaEMQoyoHG2saMEJ01kq9ncOYaRYNf3v7KZNaqK5CIOx5l2dDpHlN2I9hB--8AW-leP0gNH95aWKaS-Ahj_2gPB6I44GzgtACyOJpb1QxvBfwX65kFRwm7AdEYdM7ym3t3WHjIdS4la3gpWleRNNu_Qu2klFC8zGRpnlx6D_RmhcnY5oH3hCXFoQ9XTCWVxnL32AUVxnF7-RofH1KynV1AI6m9C_83slJGq7UKcvO2C-WQ5BAJKaITh_a2liBwCF8Ly9ibcLq4zf5jnmEQPxqxj3L022WSd2n6a7GQoBgOt92_iKW6fx0oP69fT5-CKfJHgOHMxqedg81ne9Fho-9I38XXB7ATyMYAOBAYNF3VvebD7SL1dkBPxsqNUxUuAmGEPZMYQcyS8L5I0l0OOGnK9p0usINEJr2ir5dLa-uiqur51UCtbcf4SekH0HYEvWOldIL7vx59jTloaYS9pyEWUYPxavHiqadyx4vRdOC2_rTrT1_SDCcTamll1zJT7RHFA53fuQIXw_4eIQPIa7kEJ_nfxp-5B8IvqQdEdDrBpQ3DaDxDQvoPRZQesMEGl_LhN1jA6XXjEiuGyFuG8FuGJHcZURyywh2LZP0LiOSa0ZwXY2vZLhDXBjB-iNH0_gIsjN2hOyMnThFgXGEpeeRaPp9rJ53It4AzO4Dsgvg4nvgcaqhfHxBsxuVYmQejI2zsJifGru4r9ZMYvN7oqfXGIqewXOpQHClTveFSq3UWzPf5dncOUzi-f6OOJTu4600zcayezq7mJwNl5HjgnWj4pbfaQjYi2o35UJ2zYWY_oM2FJOC42h6Oj4z4qJmne3_vlpZnBAfs7Nf7AqFCk3H7yq8UDExUUxeDn5CZTShCw-v2ryD3ID0rm88UFcKndurDi0Loeu-pTgVVHUY5AfM_lLlP1oMKRgbGEM35OvQiqFyGJqurQkjo-ezasmqghV8hss4K5JFniZxNKuXySIviySKELNNWiacZnFO02hRJGWZYyxmckkjyqIiTmOWMBbNWSpYVEQCM1wIwSKSRNhwqea9h8ZuZ9K5DpdZlsZspniJyvVNLKUlF6-oK8Ie_5dnhIZfQUKpxnfoIYeZwETYo9TOD73g11IjncPqwbReNvKz7xHDWrqa2WWAPZTd1oUHKp13Xyl56VXfSv_Uzj-PnOmKpE8hk3QFay5VZ3uD973w96_FcTs866w6b8Slr7tyLkxD6LrXMvx7aK35A4UndN3rdYSue5f-CgAA__9Xv3SQ">