[llvm-bugs] [Bug 42794] New: [X86] Unnecessary stack manipulation remaining from memcpy/extract_subvector
via llvm-bugs
llvm-bugs at lists.llvm.org
Sat Jul 27 10:18:54 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=42794
Bug ID: 42794
Summary: [X86] Unnecessary stack manipulation remaining from
memcpy/extract_subvector
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: bisqwit at iki.fi, craig.topper at gmail.com,
llvm-bugs at lists.llvm.org, llvm-dev at redking.me.uk,
spatel+llvm at rotateright.com
Split off from [Bug #42674]
Current codegen for 16/32/64 byte sums: https://godbolt.org/z/76g4k2
The 32-byte reduction below should have been able to remove memcpy's stack
spill/reload and use the original load directly.
64-byte reduction manages to do this (see Godbolt) but leaves the rsp stack
manipulations.
#include <string.h>
unsigned char calculate_checksum(const void* ptr)
{
unsigned char bytes[32], result = 0;
memcpy(bytes, ptr, 32); // Endianess does not matter.
for(unsigned n=0; n<32; ++n) result += bytes[n];
return result;
}
push rbp
mov rbp, rsp
and rsp, -32
sub rsp, 64
vmovups ymm0, ymmword ptr [rdi]
vmovaps ymmword ptr [rsp], ymm0
vmovdqa xmm0, xmmword ptr [rsp]
vpaddb xmm0, xmm0, xmmword ptr [rsp + 16]
vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
vpaddb xmm0, xmm0, xmm1
vpxor xmm1, xmm1, xmm1
vpsadbw xmm0, xmm0, xmm1
vpextrb eax, xmm0, 0
mov rsp, rbp
pop rbp
vzeroupper
ret
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20190727/2bc91ba4/attachment.html>
More information about the llvm-bugs
mailing list