<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/105807>105807</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
Clang's "__builtin_reduce_min" algorithm for float vectors needs improvement when setting "-msse4.1".
</td>
</tr>
<tr>
<th>Labels</th>
<td>
clang
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
WiwilZ
</td>
</tr>
</table>
<pre>
<https://godbolt.org/z/vW7j9Tene>
Take the f32x4 vector as an example,
"__builtin_reduce_min" uses "min" and "cmpunord" instructions 3 times,
```cpp
using f32x4 [[gnu::vector_size(16)]] = float;
float ReduceMin(f32x4 v) {
return __builtin_reduce_min(v);
}
```
```asm
ReduceMin(float vector[4]):
movaps xmm1, xmm0
movaps xmm2, xmm0
movshdup xmm3, xmm0
cmpunordss xmm0, xmm0
movaps xmm4, xmm0
andps xmm4, xmm3
minss xmm3, xmm1
shufps xmm1, xmm1, 255
movhlps xmm2, xmm2
andnps xmm0, xmm3
orps xmm0, xmm4
movaps xmm3, xmm2
minss xmm3, xmm0
cmpunordss xmm0, xmm0
movaps xmm4, xmm0
andnps xmm4, xmm3
andps xmm0, xmm2
orps xmm0, xmm4
movaps xmm2, xmm1
minss xmm2, xmm0
cmpunordss xmm0, xmm0
movaps xmm3, xmm0
andnps xmm3, xmm2
andps xmm0, xmm1
orps xmm0, xmm3
ret
```
but it can be reduced to 2 times.
```cpp
float MyReduceMin(__m128 a) {
const __m128 v1 = _mm_movehdup_ps(a); // {a[3], a[3], a[1], a[1]}
// {, a[2] == NaN ? a[3] : min(a[2], a[3]), , a[0] == NaN ? a[1] : min(a[0], a[1])}
const __m128 min64 = _mm_blendv_ps(_mm_min_ps(v1, a), v1, _mm_cmpunord_ps(a, a));
const __m128 v2 = _mm_shuffle_ps(min64, min64, 2); // {, , , min64[2]}
// {, , , min64[0] == NaN ? min64[2] : min(min64[0], min64[2])}
const __m128 min32 = _mm_blendv_ps(_mm_min_ss(v2, min64), v2, _mm_cmpunord_ss(min64, min64));
return _mm_cvtss_f32(min32);
}
```
```asm
MyReduceMin(float vector[4]):
movshdup xmm2, xmm0
movaps xmm1, xmm2
minps xmm1, xmm0
xorps xmm3, xmm3
cmpunordps xmm0, xmm3
blendvps xmm1, xmm2, xmm0
movaps xmm3, xmm1
shufps xmm3, xmm1, 2
movaps xmm2, xmm3
minss xmm2, xmm1
cmpunordss xmm1, xmm1
movaps xmm0, xmm1
blendvps xmm2, xmm3, xmm0
movaps xmm0, xmm2
ret
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy0V02PozgQ_TXOxeoIypDAgUOns7nNHFYjjbQXZMAJnsU2wobpmV-_MoaE8LG9H5pWK2Dnufzq2X7lUK35TTKWoPCEwvOOtqZUTfKVf-fVH7tMFT8SRN5KY2qNyCuCC4LLTRWZqsxeNTcEl58ILt3X47f4C5MMkd-Qd0beq_v8Qv9k2JQMXwm8B7hjuVENphpTidk7FXXFELxNRyCANM1aXhku04YVbc5SwSUCwK1mGiOAoUllYVu5qFupmsJ2calN0-aGK6kxwYYLph_xD577z-va9bSay9tArU__dJOtzZK8Oqap5j8Zgsg_IIhReEbhGSNyxtdKUYPIyYXpW_j3nusnyy0askUQY3Q8YQfDDTNtI_F6epFFIzKC0fE8Yz1rUi1cz9O8PRPHHYWnwFK2UQdt8fAnVEdrjfG7ED6CN_v0NhEwQwjV6bJo6xH6LgRZDzKujNZ3pLcMNs4TrAehsrCAKYLMuHKp9TMR_xmhy_Y6y7d_Qhgu0i6rWk_ThgFBZSGHGN46D9X0RKeIYFNVMgu_ncsvElXOEGRN9klw-B_JwmxZJlnOd9d_ynJDqkmWG3JPNpe3vnVW8pwp1TCzelKz1mBucE4lzhh2R73ARmFwvrTfciV3iD_9mB7sNBU-RJgOjvJgkCupDR6-7vzenVIhUqE6Zk9pWmsEER28BTv_tiEoCk-k94c3PH_35--jGdkZHyFGCAy2aOf-TD9jRC73kBiRfr17Eg76PGNsm2OXtxHJX0byFnzjJ5pPwgguD8Fdm6xisuicMr1WXLpG57uIjpRrWcC4G-9ijqCJYS8XA-4TWv-5VswN77nYEPcXWK7OKMoDN2i3vRJz-JqW01ATPadDFjN-oCuBv9VV97rCJF-nbd_zLK5eU2cp8lhF7dDOaJ1eCbiBZBDy31bQ56P2z2vovAxueNlKtV26_mhDG_X4ffChiZWNNjTqVy-tcuZUbn1G3IzQh-a6XVPJU01dTX4yxXb1npeJ7VrgryMnrDfsfEWCB6-PVm-jFC4LwK5ISBGTmO5Y4h8hCLwwDsNdmRQHYEXus2OcHeKMsSg-HGIaZPkxgDD2gx1PwIPAi4B4MYn9cB9HWViEcVTEfuhfIUCBxwTl1b6qOmEv4DuudcsS3wsj77iraMYq3d_lAfKKyhsCe4p3TWIHvGTtTaPAq7g2-hHCcFOx5M3Bj_0de-sOTqubargpBb6qBk_PisaSsUJjLupGdUwwafD3kkmsmTH2no0AXoTWLNj7CGC_a5sqmf2u4KZss32uBIKLZTc8XupGfWO5QXDps9UILkPCXQJ_BQAA__8h04Y6">