<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - [X86][SSE] Failure to vectorize load+extend v8i8 to v8i16"
href="https://bugs.llvm.org/show_bug.cgi?id=36091">36091</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>[X86][SSE] Failure to vectorize load+extend v8i8 to v8i16
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Windows NT
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>llvm-dev@redking.me.uk
</td>
</tr>
<tr>
<th>CC</th>
<td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, niravd@google.com, spatel+llvm@rotateright.com
</td>
</tr></table>
<p>
<div>
<pre>#include <stdint.h>
#include <x86intrin.h>
__m128i loadext_i8(int8_t *data) {
return _mm_set_epi16(data[7], data[6], data[5], data[4], data[3], data[2],
data[1], data[0]);
}
__m128i loadext_u8(uint8_t *data) {
return _mm_set_epi16(data[7], data[6], data[5], data[4], data[3], data[2],
data[1], data[0]);
}
clang -O3 -march=btver2
define <2 x i64> @loadext_i8(i8* nocapture readonly) {
%2 = getelementptr inbounds i8, i8* %0, i64 7
%3 = load i8, i8* %2, align 1, !tbaa !2
%4 = sext i8 %3 to i16
%5 = getelementptr inbounds i8, i8* %0, i64 6
%6 = load i8, i8* %5, align 1, !tbaa !2
%7 = sext i8 %6 to i16
%8 = getelementptr inbounds i8, i8* %0, i64 5
%9 = load i8, i8* %8, align 1, !tbaa !2
%10 = sext i8 %9 to i16
%11 = getelementptr inbounds i8, i8* %0, i64 4
%12 = load i8, i8* %11, align 1, !tbaa !2
%13 = sext i8 %12 to i16
%14 = getelementptr inbounds i8, i8* %0, i64 3
%15 = load i8, i8* %14, align 1, !tbaa !2
%16 = sext i8 %15 to i16
%17 = getelementptr inbounds i8, i8* %0, i64 2
%18 = load i8, i8* %17, align 1, !tbaa !2
%19 = sext i8 %18 to i16
%20 = getelementptr inbounds i8, i8* %0, i64 1
%21 = load i8, i8* %20, align 1, !tbaa !2
%22 = sext i8 %21 to i16
%23 = load i8, i8* %0, align 1, !tbaa !2
%24 = sext i8 %23 to i16
%25 = insertelement <8 x i16> undef, i16 %24, i32 0
%26 = insertelement <8 x i16> %25, i16 %22, i32 1
%27 = insertelement <8 x i16> %26, i16 %19, i32 2
%28 = insertelement <8 x i16> %27, i16 %16, i32 3
%29 = insertelement <8 x i16> %28, i16 %13, i32 4
%30 = insertelement <8 x i16> %29, i16 %10, i32 5
%31 = insertelement <8 x i16> %30, i16 %7, i32 6
%32 = insertelement <8 x i16> %31, i16 %4, i32 7
%33 = bitcast <8 x i16> %32 to <2 x i64>
ret <2 x i64> %33
}
define <2 x i64> @loadext_u8(i8* nocapture readonly) {
%2 = getelementptr inbounds i8, i8* %0, i64 7
%3 = load i8, i8* %2, align 1, !tbaa !2
%4 = zext i8 %3 to i16
%5 = getelementptr inbounds i8, i8* %0, i64 6
%6 = load i8, i8* %5, align 1, !tbaa !2
%7 = zext i8 %6 to i16
%8 = getelementptr inbounds i8, i8* %0, i64 5
%9 = load i8, i8* %8, align 1, !tbaa !2
%10 = zext i8 %9 to i16
%11 = getelementptr inbounds i8, i8* %0, i64 4
%12 = load i8, i8* %11, align 1, !tbaa !2
%13 = zext i8 %12 to i16
%14 = getelementptr inbounds i8, i8* %0, i64 3
%15 = load i8, i8* %14, align 1, !tbaa !2
%16 = zext i8 %15 to i16
%17 = getelementptr inbounds i8, i8* %0, i64 2
%18 = load i8, i8* %17, align 1, !tbaa !2
%19 = zext i8 %18 to i16
%20 = getelementptr inbounds i8, i8* %0, i64 1
%21 = load i8, i8* %20, align 1, !tbaa !2
%22 = zext i8 %21 to i16
%23 = load i8, i8* %0, align 1, !tbaa !2
%24 = zext i8 %23 to i16
%25 = insertelement <8 x i16> undef, i16 %24, i32 0
%26 = insertelement <8 x i16> %25, i16 %22, i32 1
%27 = insertelement <8 x i16> %26, i16 %19, i32 2
%28 = insertelement <8 x i16> %27, i16 %16, i32 3
%29 = insertelement <8 x i16> %28, i16 %13, i32 4
%30 = insertelement <8 x i16> %29, i16 %10, i32 5
%31 = insertelement <8 x i16> %30, i16 %7, i32 6
%32 = insertelement <8 x i16> %31, i16 %4, i32 7
%33 = bitcast <8 x i16> %32 to <2 x i64>
ret <2 x i64> %33
}
loadext_i8:
movsbl (%rdi), %ecx
movsbl 1(%rdi), %eax
movsbl 2(%rdi), %edx
vmovd %ecx, %xmm0
movsbl 3(%rdi), %ecx
vpinsrw $1, %eax, %xmm0, %xmm0
vpinsrw $2, %edx, %xmm0, %xmm0
movsbl 4(%rdi), %edx
vpinsrw $3, %ecx, %xmm0, %xmm0
movsbl 5(%rdi), %ecx
vpinsrw $4, %edx, %xmm0, %xmm0
movsbl 6(%rdi), %edx
vpinsrw $5, %ecx, %xmm0, %xmm0
movsbl 7(%rdi), %ecx
vpinsrw $6, %edx, %xmm0, %xmm0
vpinsrw $7, %ecx, %xmm0, %xmm0
retq
loadext_u8:
movzbl (%rdi), %ecx
movzbl 1(%rdi), %eax
movzbl 2(%rdi), %edx
vmovd %ecx, %xmm0
movzbl 3(%rdi), %ecx
vpinsrw $1, %eax, %xmm0, %xmm0
vpinsrw $2, %edx, %xmm0, %xmm0
movzbl 4(%rdi), %edx
vpinsrw $3, %ecx, %xmm0, %xmm0
movzbl 5(%rdi), %ecx
vpinsrw $4, %edx, %xmm0, %xmm0
movzbl 6(%rdi), %edx
vpinsrw $5, %ecx, %xmm0, %xmm0
movzbl 7(%rdi), %ecx
vpinsrw $6, %edx, %xmm0, %xmm0
vpinsrw $7, %ecx, %xmm0, %xmm0
retq</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>