[llvm-bugs] [Bug 51068] New: [X86][SSE] Failure to merge scalar float comparison results
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Jul 12 12:04:06 PDT 2021
https://bugs.llvm.org/show_bug.cgi?id=51068
Bug ID: 51068
Summary: [X86][SSE] Failure to merge scalar float comparison
results
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
llvm-dev at redking.me.uk, pengfei.wang at intel.com,
spatel+llvm at rotateright.com
https://simd.godbolt.org/z/vWb57hqzx
#include <x86intrin.h>
bool cmp2_eq(float x, float y, float z, float w) {
return (x == y) == (z == w);
}
define zeroext i1 @cmp2_eq(float %0, float %1, float %2, float %3) {
%5 = fcmp une float %0, %1
%6 = fcmp oeq float %2, %3
%7 = xor i1 %5, %6
ret i1 %7
}
cmp2:
ucomiss %xmm1, %xmm0
setp %al
setne %cl
orb %al, %cl
ucomiss %xmm3, %xmm2
setnp %dl
sete %al
andb %dl, %al
xorb %cl, %al
retq
We should be able to use cmpss (or cmpsd for doubles) and merge the results to
reduce fpu->gpr traffic and branch density.
Compare to vectors:
__v4si cmp2_eq(__v4sf x, __v4sf y, __v4sf z, __v4sf w) {
return (x == y) == (z == w);
}
define <4 x i32> @cmp2_(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x
float> %3) {
%5 = fcmp une <4 x float> %0, %1
%6 = fcmp oeq <4 x float> %2, %3
%7 = xor <4 x i1> %6, %5
%8 = sext <4 x i1> %7 to <4 x i32>
ret <4 x i32> %8
}
cmp2_:
cmpneqps %xmm1, %xmm0
cmpeqps %xmm3, %xmm2
xorps %xmm2, %xmm0
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210712/b00f85cf/attachment.html>
More information about the llvm-bugs
mailing list