[llvm-bugs] [Bug 50364] New: rob pike endianness trick not getting optimized anymore
via llvm-bugs
llvm-bugs at lists.llvm.org
Sun May 16 06:24:21 PDT 2021
https://bugs.llvm.org/show_bug.cgi?id=50364
Bug ID: 50364
Summary: rob pike endianness trick not getting optimized
anymore
Product: clang
Version: 12.0
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: LLVM Codegen
Assignee: unassignedclangbugs at nondot.org
Reporter: soap at gentoo.org
CC: llvm-bugs at lists.llvm.org, neeilans at live.com,
richard-llvm at metafoo.co.uk
I have the following repo with Rob Pike's endianness tricks:
https://github.com/SoapGentoo/portable-endianness
It seems Clang 12 broke the 64-bit store routines.
Clang 11.0.1:
store64_to_LE(unsigned long, unsigned char*): #
@store64_to_LE(unsigned long, unsigned char*)
mov qword ptr [rsi], rdi
ret
store64_to_BE(unsigned long, unsigned char*): #
@store64_to_BE(unsigned long, unsigned char*)
movbe qword ptr [rsi], rdi
ret
Clang 12.0.0:
.LCPI8_0:
.quad 8 # 0x8
.quad 16 # 0x10
.quad 24 # 0x18
.quad 32 # 0x20
.LCPI8_1:
.byte 0 # 0x0
.byte 8 # 0x8
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
store64_to_LE(unsigned long, unsigned char*): #
@store64_to_LE(unsigned long, unsigned char*)
mov byte ptr [rsi], dil
vmovq xmm0, rdi
vpbroadcastq ymm0, xmm0
vpsrlvq ymm0, ymm0, ymmword ptr [rip + .LCPI8_0]
vextracti128 xmm1, ymm0, 1
vmovdqa xmm2, xmmword ptr [rip + .LCPI8_1] # xmm2 =
<0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
vpshufb xmm1, xmm1, xmm2
vpshufb xmm0, xmm0, xmm2
vpunpcklwd xmm0, xmm0, xmm1 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
vmovd dword ptr [rsi + 1], xmm0
mov rax, rdi
shr rax, 40
mov byte ptr [rsi + 5], al
mov rax, rdi
shr rax, 48
mov byte ptr [rsi + 6], al
shr rdi, 56
mov byte ptr [rsi + 7], dil
vzeroupper
ret
.LCPI11_0:
.quad 56 # 0x38
.quad 48 # 0x30
.quad 40 # 0x28
.quad 32 # 0x20
.LCPI11_1:
.byte 0 # 0x0
.byte 8 # 0x8
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
.zero 1
store64_to_BE(unsigned long, unsigned char*): #
@store64_to_BE(unsigned long, unsigned char*)
mov rax, rdi
vmovq xmm0, rdi
vpbroadcastq ymm0, xmm0
vpsrlvq ymm0, ymm0, ymmword ptr [rip + .LCPI11_0]
vextracti128 xmm1, ymm0, 1
vmovdqa xmm2, xmmword ptr [rip + .LCPI11_1] # xmm2 =
<0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
vpshufb xmm1, xmm1, xmm2
vpshufb xmm0, xmm0, xmm2
vpunpcklwd xmm0, xmm0, xmm1 # xmm0 =
xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
vmovd dword ptr [rsi], xmm0
mov rcx, rdi
shr rcx, 24
mov byte ptr [rsi + 4], cl
mov rcx, rdi
shr rcx, 16
mov byte ptr [rsi + 5], cl
mov byte ptr [rsi + 6], ah
mov byte ptr [rsi + 7], al
vzeroupper
ret
I consider detecting this sort of pattern very important in order to write
performant, portable code.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210516/2e549530/attachment.html>
More information about the llvm-bugs
mailing list