[llvm-bugs] [Bug 41248] New: Use paddusb for saturated add/sub
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Mar 26 14:23:36 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=41248
Bug ID: 41248
Summary: Use paddusb for saturated add/sub
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: david.bolvansky at gmail.com
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
llvm-dev at redking.me.uk, spatel+llvm at rotateright.com
Code from other PR:
void add_clamp_2(unsigned char *ptr) {
for (unsigned i = 0; i != 16; ++i) {
unsigned char s = *ptr;
*ptr++ = (s + HI) > 255 ? 255 : (s + HI);
}
}
void add_clamp_3(unsigned char *ptr) {
for (unsigned i = 0; i != 16; ++i) {
unsigned char s = *ptr;
*ptr++ = (s + HI) >= 255 ? 255 : (s + HI);
}
}
add_clamp_2(unsigned char*): # @add_clamp_2(unsigned
char*)
movdqu xmm0, xmmword ptr [rdi]
pminub xmm0, xmmword ptr [rip + .LCPI2_0]
paddb xmm0, xmmword ptr [rip + .LCPI2_1]
movdqu xmmword ptr [rdi], xmm0
ret
add_clamp_3(unsigned char*): # @add_clamp_3(unsigned
char*)
movdqu xmm0, xmmword ptr [rdi]
pminub xmm0, xmmword ptr [rip + .LCPI3_0]
paddb xmm0, xmmword ptr [rip + .LCPI3_1]
movdqu xmmword ptr [rdi], xmm0
ret
ICC 19 uses paddusb here
add_clamp_2(unsigned char*):
movdqu xmm0, XMMWORD PTR [rdi] #22.29
paddusb xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip] #22.29
movdqu XMMWORD PTR [rdi], xmm0 #22.10
ret #24.1
add_clamp_3(unsigned char*):
movdqu xmm0, XMMWORD PTR [rdi] #29.30
paddusb xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip] #29.30
movdqu XMMWORD PTR [rdi], xmm0 #29.10
ret
Maybe use paddusb as well?
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190326/5b46146c/attachment.html>
More information about the llvm-bugs
mailing list