<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Issue</th>
<td>
<a href=https://github.com/llvm/llvm-project/issues/55098>55098</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>
std::clamp loop performance
</td>
</tr>
<tr>
<th>Labels</th>
<td>
llvm:codegen,
performance,
llvm:optimizations
</td>
</tr>
<tr>
<th>Assignees</th>
<td>
</td>
</tr>
<tr>
<th>Reporter</th>
<td>
davemgreen
</td>
</tr>
</table>
<pre>
I think this is a slight simplification of std::clamp from libcxx, called in a loop:
https://godbolt.org/z/4bqq8azo3
```
template <class _T1, class _T2 = _T1>
struct __less
{
bool operator()(const _T1& __x, const _T1& __y) const {return __x < __y;}
bool operator()(const _T1& __x, const _T2& __y) const {return __x < __y;}
bool operator()(const _T2& __x, const _T1& __y) const {return __x < __y;}
bool operator()(const _T2& __x, const _T2& __y) const {return __x < __y;}
};
template <class _T1>
struct __less<_T1, _T1>
{
bool operator()(const _T1& __x, const _T1& __y) const {return __x < __y;}
};
template<class _Tp, class _Compare>
inline const _Tp&
clamp(const _Tp& __v, const _Tp& __lo, const _Tp& __hi, _Compare __comp)
{
return __comp(__v, __lo) ? __lo : __comp(__hi, __v) ? __hi : __v;
}
template<class _Tp>
inline const _Tp&
clamp(const _Tp& __v, const _Tp& __lo, const _Tp& __hi)
{
return clamp(__v, __lo, __hi, __less<_Tp>());
}
void foo(int n, float *y, float *v)
{
for(int i = 0; i < n; i++)
y[i] = clamp(v[i], 0.25f, 0.75f);
}
```
The loop still contains the stores of the floating point clamp values to local stack allocas, and has a load through selected pointer. It doesn't get vectorized as a result:
```
10: ; preds = %7, %10
%11 = phi i64 [ 0, %7 ], [ %20, %10 ]
%12 = getelementptr inbounds float, ptr %2, i64 %11
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %4)
store float 2.500000e-01, ptr %4, align 4, !tbaa !8
call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %5)
store float 7.500000e-01, ptr %5, align 4, !tbaa !8
%13 = load float, ptr %12, align 4, !tbaa !8
%14 = fcmp fast olt float %13, 2.500000e-01
%15 = fcmp fast ogt float %13, 7.500000e-01
%16 = select i1 %15, ptr %5, ptr %12
%17 = select i1 %14, ptr %4, ptr %16
%18 = load float, ptr %17, align 4, !tbaa !8
%19 = getelementptr inbounds float, ptr %1, i64 %11
store float %18, ptr %19, align 4, !tbaa !8
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %5)
call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %4)
%20 = add nuw nsw i64 %11, 1
%21 = icmp eq i64 %20, %8
br i1 %21, label %9, label %10, !llvm.loop !12
```
A simplification without using references does better, and gets vectorized, but still contains this:
https://godbolt.org/z/cP9zWz7o5
```
%42 = fcmp fast olt float %41, 2.500000e-01
%43 = tail call fast float @llvm.minnum.f32(float %41, float 7.500000e-01)
%44 = select fast i1 %42, float 2.500000e-01, float %43
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy9V0tzozgQ_jX4ohoXCDBw8MFxdqrmtoep2mNKgLC1IxCRhDPxr99WC9vYIZnMZjcUxnp0f_1UN5Sqfl5_I3Yvuh_uaQjcjBgpdntLjGh7KRpRMStUR1RDjK2DeAN3JVnbk0arlkhRVj9_BnRLKiYlr4noAEIq1TvS8D4IN3tre-Nm9CvcO1WXStql0juYHeGXlI-POTuq2JMHq3C8_RSfloMyzHISxFuQbgx5-B6h1HFCYeceF-M_PIuxeqgseXiQ3JgRK7vzAwJXqZQkqueaWaUDmge0gGelOmM9-Ap4vWXXa89AOa4BoOZ20J0jdbrhdnwXZPcfEUT_D0H0syyaE_QvLHKD-O7XSTAf73g7JsiU5PPj_9KIa1MmlvSTdN6qtmeanxUXnRQdPysCpCu_gSdxonbvVTxM1R7XpJpZ3Av00SgPFirl8IoZf50N9ST5KMUDF2D8VxzDYDMlGiU44hPRXoxEh4tvzg573T2f4ow3LD_BXxm-vTjxknio65hRxWs2HpSoSaMAIxedJZ3DaKRikFV083w1O8zr1WDeOmaB1S8EUTjcApobBvQO7-LC467nIL0TQXqPTCerDuOiExwuadr4QeYGczbMVenve46lHzqFkNK51zLRGegtHJaU5sa1ETdD00S3I71y-vuGcmByABKrAAS6CbCw6geBtgIz49RhXU32zGCDYTUAaTXs9sRwySsLvQfBuF6Sb5bUihvwaWbJjltyAAKlxRGIkB9UGaQ9t6gba6LQ5efvXc7hvea1Qa8GNM2cxvAfhSfvu0mE2z0cAbFKCPgcoubpMjJ63y3CnIZnANyZgPhmB3aB4S3vbG819N1SDR2IR9c6VrfqcNwYhTnxJxjXrQmmYJCEUh7aJfR6bkXLl-B2bZd96HIL2JITVqe6bgAuwEkmOYWBHXOVLtPQXfxLGE1UQAgGrxWdRwtoZEvG3H_-3yiUvqJQNq9Q-g6FnLtidDQm261bI_pOjAQxmsq9MTEoN_Dycz7ZIMGxXrltwpresu5esGavsK6Q1Z8MIiIPd-uBiykTzmyGM7kN5olzNeXM3_BW9k5vFb-T2tFsak_jj2pNOYqPpSLv6vcn4kdwpicMiwH6hdU16YYn0pmnid3APY099TVGuLThjye6czk5m1nqMbwUISQrOYourmbRyBd5A1x1h8kpZWbbwOb22-FJ2L0aLBmMK_maN1zzroJS74o0KbmFon2q7xB6MynXbrkE1hcdRZh3f2BUfxbHv46ZSmd1Rp8l9K1TmkRvnNLEFwnQS_qII__IO8a9FRDYdtnEUDTyG9i5QjUNfZJMDySC-7Al9MJ-W3gvMm6_qhb1Oq6LuGALK6zk65uPOgwwvBTDu0XLIEaLQcv1jYMhmkO5hDc8mDjzxr8vvVZ_g44wFcZAJ4dBmoZFvtiv07hIV00Y5zxMsjoHoyroeFFeZzWjRcIXmG9mDb0voBRBQSNV8x2HJk59CtKJXpfFkVj1cLDEERPOuN30fiHWNKQ0TOgqLGiUrpbRKuZlEeerpsmrvKghPryFwC0xSpAxC71GU8phZ1zwhLHmsglvolA4OEc1AZ8NkNd6XbMDb3ea826Bhq_R6n8AJotbnw">