[llvm-bugs] [Bug 37549] New: SSE4.1 blend formation logic is not flexible
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue May 22 00:56:37 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=37549
Bug ID: 37549
Summary: SSE4.1 blend formation logic is not flexible
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: lebedev.ri at gmail.com
CC: llvm-bugs at lists.llvm.org
https://godbolt.org/g/iHXjfc
#include <smmintrin.h>
void scalar(float* in, float thr, float* replace) {
if(in[0] >= thr || in[1] >= thr || in[2] >= thr || in[3] >= thr)
{
for(int c = 0; c < 4; c++)
{
in[c] = replace[c];
}
} else {
for(int c = 0; c < 4; c++)
{
in[c] = in[c];
}
}
}
__m128 notblend(__m128 in, __m128 thr, __m128 replace) {
__m128 isoe = _mm_cmpge_ps(in, thr);
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
isoe = _mm_or_ps(_mm_unpacklo_ps(isoe, isoe), _mm_unpackhi_ps(isoe, isoe));
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, in), _mm_and_ps(isoe,
replace));
return result;
}
__m128 almostblend(__m128 in, __m128 thr, __m128 replace) {
__m128 isoe = _mm_cmpge_ps(in, thr);
__m128 result = _mm_or_ps(_mm_andnot_ps(isoe, in), _mm_and_ps(isoe,
replace));
return result;
}
__m128 blend(__m128 in, __m128 thr, __m128 replace) {
__m128 isoe = _mm_cmpge_ps(in, thr);
__m128 result = _mm_blendv_ps(in, replace, isoe);
return result;
}
Last two are codegen'ed as vblendvps, the first two aren't.
The first one is likely a vectorizer failure.
The second example, notblend(), is of the most interest here.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180522/a4d433eb/attachment.html>
More information about the llvm-bugs
mailing list