[llvm-bugs] [Bug 46968] New: Missed SLP vectorization with umin/umax
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Aug 3 07:48:28 PDT 2020
https://bugs.llvm.org/show_bug.cgi?id=46968
Bug ID: 46968
Summary: Missed SLP vectorization with umin/umax
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Common Code Generator Code
Assignee: unassignedbugs at nondot.org
Reporter: david.bolvansky at gmail.com
CC: llvm-bugs at lists.llvm.org
Hot code - LightPixel - from Firefox (rasterflood-svg benchmark could be
faster):
#include<stdint.h>
#include<stddef.h>
const ptrdiff_t B8G8R8A8_COMPONENT_BYTEOFFSET_B = 0;
const ptrdiff_t B8G8R8A8_COMPONENT_BYTEOFFSET_G = 1;
const ptrdiff_t B8G8R8A8_COMPONENT_BYTEOFFSET_R = 2;
const ptrdiff_t B8G8R8A8_COMPONENT_BYTEOFFSET_A = 3;
static const int sInputIntPrecisionBits = 15;
static const int sOutputIntPrecisionBits = 15;
static const int sCacheIndexPrecisionBits = 7;
static inline unsigned umax(unsigned a, unsigned b) {
return a > b ? a : b;
}
static inline unsigned umin(unsigned a, unsigned b) {
return a > b ? b : a;
}
void foo(uint8_t components[4], uint32_t specularNHi, uint32_t aColor) {
components[B8G8R8A8_COMPONENT_BYTEOFFSET_B] =
umin((specularNHi * components[B8G8R8A8_COMPONENT_BYTEOFFSET_B]) >>
sOutputIntPrecisionBits,
255U);
components[B8G8R8A8_COMPONENT_BYTEOFFSET_G] =
umin((specularNHi *components[B8G8R8A8_COMPONENT_BYTEOFFSET_G]) >>
sOutputIntPrecisionBits,
255U);
components[B8G8R8A8_COMPONENT_BYTEOFFSET_R] =
umin((specularNHi * components[B8G8R8A8_COMPONENT_BYTEOFFSET_R]) >>
sOutputIntPrecisionBits,
255U);
components[B8G8R8A8_COMPONENT_BYTEOFFSET_A] =
umax(components[B8G8R8A8_COMPONENT_BYTEOFFSET_B],
umax(components[B8G8R8A8_COMPONENT_BYTEOFFSET_G],
components[B8G8R8A8_COMPONENT_BYTEOFFSET_R]));
}
-O3 -mavx2 - We got: List vectorization was possible but not beneficial with
cost 0 >= 0
foo(unsigned char*, unsigned int, unsigned int): #
@foo(unsigned char*, unsigned int, unsigned int)
movzx eax, byte ptr [rdi]
imul eax, esi
shr eax, 15
cmp eax, 255
mov r8d, 255
cmovae eax, r8d
mov byte ptr [rdi], al
movzx edx, byte ptr [rdi + 1]
imul edx, esi
shr edx, 15
cmp edx, 255
cmovae edx, r8d
mov byte ptr [rdi + 1], dl
movzx ecx, byte ptr [rdi + 2]
imul ecx, esi
shr ecx, 15
cmp ecx, 255
cmovae ecx, r8d
mov byte ptr [rdi + 2], cl
cmp edx, ecx
cmova ecx, edx
cmp eax, ecx
cmova ecx, eax
mov byte ptr [rdi + 3], cl
ret
-O3 -mavx512f - Clang partially vectorizes it:
.LCPI0_0:
.long 255 # 0xff
foo(unsigned char*, unsigned int, unsigned int): #
@foo(unsigned char*, unsigned int, unsigned int)
movzx eax, byte ptr [rdi]
imul eax, esi
shr eax, 15
cmp eax, 255
mov ecx, 255
cmovb ecx, eax
mov byte ptr [rdi], cl
movzx eax, word ptr [rdi + 1]
vmovd xmm0, eax
vpmovzxbd xmm0, xmm0 # xmm0 =
xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
vmovd xmm1, esi
vpbroadcastd xmm1, xmm1
vpmulld xmm0, xmm1, xmm0
vpbroadcastd xmm1, dword ptr [rip + .LCPI0_0] # xmm1 =
[255,255,255,255]
vpsrld xmm0, xmm0, 15
vpminud xmm0, xmm0, xmm1
vmovd eax, xmm0
mov byte ptr [rdi + 1], al
vpextrd edx, xmm0, 1
mov byte ptr [rdi + 2], dl
cmp eax, edx
cmova edx, eax
cmp ecx, edx
cmova edx, ecx
mov byte ptr [rdi + 3], dl
ret
Godbolt: -Rpass-missed=vec*
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200803/26f75423/attachment.html>
More information about the llvm-bugs
mailing list