[llvm-bugs] [Bug 36091] New: [X86][SSE] Failure to vectorize load+extend v8i8 to v8i16
via llvm-bugs
llvm-bugs at lists.llvm.org
Thu Jan 25 09:08:56 PST 2018
https://bugs.llvm.org/show_bug.cgi?id=36091
Bug ID: 36091
Summary: [X86][SSE] Failure to vectorize load+extend v8i8 to
v8i16
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
niravd at google.com, spatel+llvm at rotateright.com
#include <stdint.h>
#include <x86intrin.h>
__m128i loadext_i8(int8_t *data) {
return _mm_set_epi16(data[7], data[6], data[5], data[4], data[3], data[2],
data[1], data[0]);
}
__m128i loadext_u8(uint8_t *data) {
return _mm_set_epi16(data[7], data[6], data[5], data[4], data[3], data[2],
data[1], data[0]);
}
clang -O3 -march=btver2
define <2 x i64> @loadext_i8(i8* nocapture readonly) {
%2 = getelementptr inbounds i8, i8* %0, i64 7
%3 = load i8, i8* %2, align 1, !tbaa !2
%4 = sext i8 %3 to i16
%5 = getelementptr inbounds i8, i8* %0, i64 6
%6 = load i8, i8* %5, align 1, !tbaa !2
%7 = sext i8 %6 to i16
%8 = getelementptr inbounds i8, i8* %0, i64 5
%9 = load i8, i8* %8, align 1, !tbaa !2
%10 = sext i8 %9 to i16
%11 = getelementptr inbounds i8, i8* %0, i64 4
%12 = load i8, i8* %11, align 1, !tbaa !2
%13 = sext i8 %12 to i16
%14 = getelementptr inbounds i8, i8* %0, i64 3
%15 = load i8, i8* %14, align 1, !tbaa !2
%16 = sext i8 %15 to i16
%17 = getelementptr inbounds i8, i8* %0, i64 2
%18 = load i8, i8* %17, align 1, !tbaa !2
%19 = sext i8 %18 to i16
%20 = getelementptr inbounds i8, i8* %0, i64 1
%21 = load i8, i8* %20, align 1, !tbaa !2
%22 = sext i8 %21 to i16
%23 = load i8, i8* %0, align 1, !tbaa !2
%24 = sext i8 %23 to i16
%25 = insertelement <8 x i16> undef, i16 %24, i32 0
%26 = insertelement <8 x i16> %25, i16 %22, i32 1
%27 = insertelement <8 x i16> %26, i16 %19, i32 2
%28 = insertelement <8 x i16> %27, i16 %16, i32 3
%29 = insertelement <8 x i16> %28, i16 %13, i32 4
%30 = insertelement <8 x i16> %29, i16 %10, i32 5
%31 = insertelement <8 x i16> %30, i16 %7, i32 6
%32 = insertelement <8 x i16> %31, i16 %4, i32 7
%33 = bitcast <8 x i16> %32 to <2 x i64>
ret <2 x i64> %33
}
define <2 x i64> @loadext_u8(i8* nocapture readonly) {
%2 = getelementptr inbounds i8, i8* %0, i64 7
%3 = load i8, i8* %2, align 1, !tbaa !2
%4 = zext i8 %3 to i16
%5 = getelementptr inbounds i8, i8* %0, i64 6
%6 = load i8, i8* %5, align 1, !tbaa !2
%7 = zext i8 %6 to i16
%8 = getelementptr inbounds i8, i8* %0, i64 5
%9 = load i8, i8* %8, align 1, !tbaa !2
%10 = zext i8 %9 to i16
%11 = getelementptr inbounds i8, i8* %0, i64 4
%12 = load i8, i8* %11, align 1, !tbaa !2
%13 = zext i8 %12 to i16
%14 = getelementptr inbounds i8, i8* %0, i64 3
%15 = load i8, i8* %14, align 1, !tbaa !2
%16 = zext i8 %15 to i16
%17 = getelementptr inbounds i8, i8* %0, i64 2
%18 = load i8, i8* %17, align 1, !tbaa !2
%19 = zext i8 %18 to i16
%20 = getelementptr inbounds i8, i8* %0, i64 1
%21 = load i8, i8* %20, align 1, !tbaa !2
%22 = zext i8 %21 to i16
%23 = load i8, i8* %0, align 1, !tbaa !2
%24 = zext i8 %23 to i16
%25 = insertelement <8 x i16> undef, i16 %24, i32 0
%26 = insertelement <8 x i16> %25, i16 %22, i32 1
%27 = insertelement <8 x i16> %26, i16 %19, i32 2
%28 = insertelement <8 x i16> %27, i16 %16, i32 3
%29 = insertelement <8 x i16> %28, i16 %13, i32 4
%30 = insertelement <8 x i16> %29, i16 %10, i32 5
%31 = insertelement <8 x i16> %30, i16 %7, i32 6
%32 = insertelement <8 x i16> %31, i16 %4, i32 7
%33 = bitcast <8 x i16> %32 to <2 x i64>
ret <2 x i64> %33
}
loadext_i8:
movsbl (%rdi), %ecx
movsbl 1(%rdi), %eax
movsbl 2(%rdi), %edx
vmovd %ecx, %xmm0
movsbl 3(%rdi), %ecx
vpinsrw $1, %eax, %xmm0, %xmm0
vpinsrw $2, %edx, %xmm0, %xmm0
movsbl 4(%rdi), %edx
vpinsrw $3, %ecx, %xmm0, %xmm0
movsbl 5(%rdi), %ecx
vpinsrw $4, %edx, %xmm0, %xmm0
movsbl 6(%rdi), %edx
vpinsrw $5, %ecx, %xmm0, %xmm0
movsbl 7(%rdi), %ecx
vpinsrw $6, %edx, %xmm0, %xmm0
vpinsrw $7, %ecx, %xmm0, %xmm0
retq
loadext_u8:
movzbl (%rdi), %ecx
movzbl 1(%rdi), %eax
movzbl 2(%rdi), %edx
vmovd %ecx, %xmm0
movzbl 3(%rdi), %ecx
vpinsrw $1, %eax, %xmm0, %xmm0
vpinsrw $2, %edx, %xmm0, %xmm0
movzbl 4(%rdi), %edx
vpinsrw $3, %ecx, %xmm0, %xmm0
movzbl 5(%rdi), %ecx
vpinsrw $4, %edx, %xmm0, %xmm0
movzbl 6(%rdi), %edx
vpinsrw $5, %ecx, %xmm0, %xmm0
movzbl 7(%rdi), %ecx
vpinsrw $6, %edx, %xmm0, %xmm0
vpinsrw $7, %ecx, %xmm0, %xmm0
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180125/781247df/attachment.html>
More information about the llvm-bugs
mailing list