[llvm-bugs] [Bug 45415] New: [VectorCombine?] Inversed <4 x i32> + @llvm.bswap.v4i32 = Inversed <16 x i8>
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri Apr 3 02:38:38 PDT 2020
https://bugs.llvm.org/show_bug.cgi?id=45415
Bug ID: 45415
Summary: [VectorCombine?] Inversed <4 x i32> +
@llvm.bswap.v4i32 = Inversed <16 x i8>
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: lebedev.ri at gmail.com
CC: llvm-bugs at lists.llvm.org
I was actually initially looking whether the pattern is vectorized at all,
but it is, and the ir //seems// slightly suboptimal still:
https://godbolt.org/z/CXk4bt
#include <cstdint>
#include <cstring>
#include <array>
void test(char* __restrict__ in, char* __restrict__ out) {
for(int i = 0; i != 16; i += 4) {
uint32_t tmp;
memcpy(&tmp, in + (16-4) - i, sizeof(uint32_t));
tmp = __builtin_bswap32(tmp);
memcpy(out + i, &tmp, sizeof(uint32_t));
}
}
Currently results in
; Function Attrs: nofree nounwind uwtable
define dso_local void @_Z4testPcS_(i8* noalias nocapture readonly %in, i8*
noalias nocapture %out) local_unnamed_addr #0 {
entry:
%0 = bitcast i8* %in to <4 x i32>*
%1 = load <4 x i32>, <4 x i32>* %0, align 1
%reorder_shuffle = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32>
<i32 3, i32 2, i32 1, i32 0>
%2 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %reorder_shuffle)
%3 = bitcast i8* %out to <4 x i32>*
store <4 x i32> %2, <4 x i32>* %3, align 1
ret void
}
Which is lowered as
.LCPI0_0:
.byte 15 # 0xf
.byte 14 # 0xe
.byte 13 # 0xd
.byte 12 # 0xc
.byte 11 # 0xb
.byte 10 # 0xa
.byte 9 # 0x9
.byte 8 # 0x8
.byte 7 # 0x7
.byte 6 # 0x6
.byte 5 # 0x5
.byte 4 # 0x4
.byte 3 # 0x3
.byte 2 # 0x2
.byte 1 # 0x1
.byte 0 # 0x0
test(char*, char*): # @test(char*, char*)
vmovdqu xmm0, xmmword ptr [rdi]
vpshufb xmm0, xmm0, xmmword ptr [rip + .LCPI0_0] # xmm0 =
xmm0[15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
vmovdqu xmmword ptr [rsi], xmm0
ret
So we could combine those two shuffles in IR too:
; Function Attrs: nofree nounwind uwtable
define dso_local void @_Z4testPcS_(i8* noalias nocapture readonly %in, i8*
noalias nocapture %out) local_unnamed_addr #0 {
entry:
%t0 = bitcast i8* %in to <16 x i8>*
%t1 = bitcast i8* %out to <16 x i8>*
%t2 = load <16 x i8>, <16 x i8>* %t0, align 1
%t3 = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32>
<i32 15, i32 14, i32 13, i32 12,
i32 11, i32 10, i32 9, i32 8,
i32 7, i32 6, i32 5, i32 4,
i32 3, i32 2, i32 1, i32 0>
store <16 x i8> %t3, <16 x i8>* %t1, align 1
ret void
}
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200403/affda705/attachment-0001.html>
More information about the llvm-bugs
mailing list