[llvm-bugs] [Bug 37890] New: Equivalent vector shuffles emit widely different x86 avx2 code
via llvm-bugs
llvm-bugs at lists.llvm.org
Thu Jun 21 01:49:53 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=37890
Bug ID: 37890
Summary: Equivalent vector shuffles emit widely different x86
avx2 code
Product: new-bugs
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: gonzalobg88 at gmail.com
CC: chandlerc at gmail.com, hfinkel at anl.gov,
llvm-bugs at lists.llvm.org, llvm-dev at redking.me.uk,
spatel+llvm at rotateright.com
The following LLVM-IR snippets perform something equivalent to this (in Rust
pseudo-code):
let x: i32x4;
let y = shuffle!(x, [2, 3, undef, undef]); // i32x4(2, 3, undef, undef)
let a = x + y; // i32x4(2, 4, undef, undef)
let b = shuffle!(x, [1, undef, undef, undef]); // i32x4(4, undef, undef,
undef)
let c = a + b; // i32x4(6, undef, undef, undef);
let result: i32 = c.extract(0);
The following 3 snippets of LLVM IR all emit different assembly when compiled
with "llc -O3 -mattr=avx2"
# Snippet 1 - reduction into smaller vectors that avoids undef
define i32 @add_and_reduce(<4 x i32> %a) {
%b = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
%as = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
%sum = add <2 x i32> %as, %b
%c = extractelement <2 x i32> %sum, i32 0
%d = extractelement <2 x i32> %sum, i32 1
%r = add i32 %c, %d
ret i32 %r
}
emits
vpshufd xmm1, xmm0, 250 # xmm1 = xmm0[2,2,3,3]
vpmovzxdq xmm0, xmm0 # xmm0 = xmm0[0],zero,xmm0[1],zero
vpaddq xmm0, xmm0, xmm1
vmovd ecx, xmm0
vpextrd eax, xmm0, 2
add eax, ecx
ret
# Snippet 2 - reduction using 0 instead of undef
define i32 @add_and_reduce(<4 x i32> %a) {
%b = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 3, i32
0, i32 0>
%sum = add <4 x i32> %a, %b
%c = shufflevector <4 x i32> %sum, <4 x i32> %sum, <4 x i32> <i32 1, i32 0,
i32 0, i32 0>
%d = add <4 x i32> %sum, %c
%r = extractelement <4 x i32> %d, i32 0
ret i32 %r
}
emits
vpshufd xmm1, xmm0, 14 # xmm1 = xmm0[2,3,0,0]
vpaddd xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 1 # xmm1 = xmm0[1,0,0,0]
vpaddd xmm0, xmm0, xmm1
vmovd eax, xmm0
ret
# Snippet 3 - reduction using undef
define i32 @add_and_reduce(<4 x i32> %a) {
%b = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 3, i32
undef, i32 undef>
%sum = add <4 x i32> %a, %b
%c = shufflevector <4 x i32> %sum, <4 x i32> %sum, <4 x i32> <i32 1, i32
undef, i32 undef, i32 undef>
%d = add <4 x i32> %sum, %c
%r = extractelement <4 x i32> %d, i32 0
ret i32 %r
}
emits
vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
vpaddd xmm0, xmm0, xmm1
vphaddd xmm0, xmm0, xmm0
vmovd eax, xmm0
ret
---
I've commented about this on the RFC for Rust's portable packed vector
extensions [0], and whether this is a bug is intended or not might influence
how we end up supporting portable shuffles (e.g. do we need to support passing
`undef` as a shufflevector index?).
[0] https://github.com/rust-lang/rfcs/pull/2366#issuecomment-399027304
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180621/664b7620/attachment.html>
More information about the llvm-bugs
mailing list