[llvm-bugs] [Bug 41248] New: Use paddusb for saturated add/sub

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Mar 26 14:23:36 PDT 2019


https://bugs.llvm.org/show_bug.cgi?id=41248

            Bug ID: 41248
           Summary: Use paddusb for saturated add/sub
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
                    llvm-dev at redking.me.uk, spatel+llvm at rotateright.com

Code from other PR:

void add_clamp_2(unsigned char *ptr) {
    for (unsigned i = 0; i != 16; ++i) {
        unsigned char s = *ptr;
        *ptr++ = (s + HI) > 255 ? 255 : (s + HI);
    }
}

void add_clamp_3(unsigned char *ptr) {
    for (unsigned i = 0; i != 16; ++i) {
        unsigned char s = *ptr;
        *ptr++ = (s + HI) >= 255 ? 255 : (s + HI);
    }
}

add_clamp_2(unsigned char*):                      # @add_clamp_2(unsigned
char*)
        movdqu  xmm0, xmmword ptr [rdi]
        pminub  xmm0, xmmword ptr [rip + .LCPI2_0]
        paddb   xmm0, xmmword ptr [rip + .LCPI2_1]
        movdqu  xmmword ptr [rdi], xmm0
        ret

add_clamp_3(unsigned char*):                      # @add_clamp_3(unsigned
char*)
        movdqu  xmm0, xmmword ptr [rdi]
        pminub  xmm0, xmmword ptr [rip + .LCPI3_0]
        paddb   xmm0, xmmword ptr [rip + .LCPI3_1]
        movdqu  xmmword ptr [rdi], xmm0
        ret

ICC 19 uses paddusb here
add_clamp_2(unsigned char*):
        movdqu    xmm0, XMMWORD PTR [rdi]                       #22.29
        paddusb   xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip]   #22.29
        movdqu    XMMWORD PTR [rdi], xmm0                       #22.10
        ret                                                     #24.1
add_clamp_3(unsigned char*):
        movdqu    xmm0, XMMWORD PTR [rdi]                       #29.30
        paddusb   xmm0, XMMWORD PTR .L_2il0floatpacket.1[rip]   #29.30
        movdqu    XMMWORD PTR [rdi], xmm0                       #29.10
        ret   

Maybe use paddusb as well?

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190326/5b46146c/attachment.html>


More information about the llvm-bugs mailing list