<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Equivalent vector shuffles emit widely different x86 avx2 code"
href="https://bugs.llvm.org/show_bug.cgi?id=37890">37890</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Equivalent vector shuffles emit widely different x86 avx2 code
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>gonzalobg88@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>chandlerc@gmail.com, hfinkel@anl.gov, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, spatel+llvm@rotateright.com
</td>
</tr></table>
<p>
<div>
<pre>The following LLVM-IR snippets perform something equivalent to this (in Rust
pseudo-code):
let x: i32x4;
let y = shuffle!(x, [2, 3, undef, undef]); // i32x4(2, 3, undef, undef)
let a = x + y; // i32x4(2, 4, undef, undef)
let b = shuffle!(x, [1, undef, undef, undef]); // i32x4(4, undef, undef,
undef)
let c = a + b; // i32x4(6, undef, undef, undef);
let result: i32 = c.extract(0);
The following 3 snippets of LLVM IR all emit different assembly when compiled
with "llc -O3 -mattr=avx2"
# Snippet 1 - reduction into smaller vectors that avoids undef
define i32 @add_and_reduce(<4 x i32> %a) {
%b = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
%as = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
%sum = add <2 x i32> %as, %b
%c = extractelement <2 x i32> %sum, i32 0
%d = extractelement <2 x i32> %sum, i32 1
%r = add i32 %c, %d
ret i32 %r
}
emits
vpshufd xmm1, xmm0, 250 # xmm1 = xmm0[2,2,3,3]
vpmovzxdq xmm0, xmm0 # xmm0 = xmm0[0],zero,xmm0[1],zero
vpaddq xmm0, xmm0, xmm1
vmovd ecx, xmm0
vpextrd eax, xmm0, 2
add eax, ecx
ret
# Snippet 2 - reduction using 0 instead of undef
define i32 @add_and_reduce(<4 x i32> %a) {
%b = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 3, i32
0, i32 0>
%sum = add <4 x i32> %a, %b
%c = shufflevector <4 x i32> %sum, <4 x i32> %sum, <4 x i32> <i32 1, i32 0,
i32 0, i32 0>
%d = add <4 x i32> %sum, %c
%r = extractelement <4 x i32> %d, i32 0
ret i32 %r
}
emits
vpshufd xmm1, xmm0, 14 # xmm1 = xmm0[2,3,0,0]
vpaddd xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 1 # xmm1 = xmm0[1,0,0,0]
vpaddd xmm0, xmm0, xmm1
vmovd eax, xmm0
ret
# Snippet 3 - reduction using undef
define i32 @add_and_reduce(<4 x i32> %a) {
%b = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 3, i32
undef, i32 undef>
%sum = add <4 x i32> %a, %b
%c = shufflevector <4 x i32> %sum, <4 x i32> %sum, <4 x i32> <i32 1, i32
undef, i32 undef, i32 undef>
%d = add <4 x i32> %sum, %c
%r = extractelement <4 x i32> %d, i32 0
ret i32 %r
}
emits
vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
vpaddd xmm0, xmm0, xmm1
vphaddd xmm0, xmm0, xmm0
vmovd eax, xmm0
ret
---
I've commented about this on the RFC for Rust's portable packed vector
extensions [0], and whether this is a bug is intended or not might influence
how we end up supporting portable shuffles (e.g. do we need to support passing
`undef` as a shufflevector index?).
[0] <a href="https://github.com/rust-lang/rfcs/pull/2366#issuecomment-399027304">https://github.com/rust-lang/rfcs/pull/2366#issuecomment-399027304</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>