[llvm-bugs] [Bug 37731] New: llvm.experimental.vector.reduce.xor and a extractelement+xors produce different code
via llvm-bugs
llvm-bugs at lists.llvm.org
Thu Jun 7 01:06:30 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=37731
Bug ID: 37731
Summary: llvm.experimental.vector.reduce.xor and a
extractelement+xors produce different code
Product: new-bugs
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: gonzalobg88 at gmail.com
CC: llvm-bugs at lists.llvm.org
The following Rust code is a SIMD implementation of the Lsfr113 PRNG (godbolt:
https://godbolt.org/g/yDPyds):
```rust
#![feature(stdsimd)]
use std::simd::*;
pub struct Lsfr113 {
u: u32x4,
}
impl Lsfr113 {
pub fn next_u32(&mut self) -> u32 {
const SHL: u32x4 = u32x4::new(6, 2, 13, 3);
const SHR: u32x4 = u32x4::new(13, 27, 21, 12);
const AND: u32x4 = u32x4::new(4294967294, 4294967288, 4294967280,
4294967168);
const SHL2: u32x4 = u32x4::new(18, 2, 7, 13);
let b = ((self.u << SHL) ^ self.u) >> SHR;
self.u = ((self.u & AND) << SHL2) ^ b;
self.u.xor()
}
pub fn next2_u32(&mut self) -> u32 {
const SHL: u32x4 = u32x4::new(6, 2, 13, 3);
const SHR: u32x4 = u32x4::new(13, 27, 21, 12);
const AND: u32x4 = u32x4::new(4294967294, 4294967288, 4294967280,
4294967168);
const SHL2: u32x4 = u32x4::new(18, 2, 7, 13);
let b = ((self.u << SHL) ^ self.u) >> SHR;
self.u = ((self.u & AND) << SHL2) ^ b;
self.u.extract(0)
^ self.u.extract(1)
^ self.u.extract(2)
^ self.u.extract(3)
}
}
```
Using `-C opt-level=3 -C target-cpu=native -C panic=abort -C debuginfo=0
--emit=llvm-ir` it emits the following LLVM-IR (godbolt:
https://godbolt.org/g/g8ZVWn)
```llvm-ir
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @example::Lsfr113::next_u32(<4 x i32>* noalias nocapture
dereferenceable(16) %self) unnamed_addr #0 {
%0 = load <4 x i32>, <4 x i32>* %self, align 16
%1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
%2 = xor <4 x i32> %1, %0
%3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
%4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
%5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
%6 = xor <4 x i32> %3, %5
store <4 x i32> %6, <4 x i32>* %self, align 16
%7 = tail call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
%6) #2
ret i32 %7
}
define i32 @example::Lsfr113::next2_u32(<4 x i32>* noalias nocapture
dereferenceable(16) %self) unnamed_addr #0 {
%0 = load <4 x i32>, <4 x i32>* %self, align 16
%1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
%2 = xor <4 x i32> %1, %0
%3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
%4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
%5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
%6 = xor <4 x i32> %3, %5
store <4 x i32> %6, <4 x i32>* %self, align 16
%7 = extractelement <4 x i32> %6, i32 0
%8 = extractelement <4 x i32> %6, i32 1
%9 = xor i32 %7, %8
%10 = extractelement <4 x i32> %6, i32 2
%11 = xor i32 %9, %10
%12 = extractelement <4 x i32> %6, i32 3
%13 = xor i32 %11, %12
ret i32 %13
}
declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>) #1
attributes #0 = { nounwind "probe-stack"="__rust_probestack" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
```
which generates the following assembly:
```asm
.LCPI0_0:
.long 6
.long 2
.long 13
.long 3
.LCPI0_1:
.long 13
.long 27
.long 21
.long 12
.LCPI0_2:
.long 4294967294
.long 4294967288
.long 4294967280
.long 4294967168
.LCPI0_3:
.long 18
.long 2
.long 7
.long 13
example::Lsfr113::next_u32:
vmovdqa xmm0, xmmword ptr [rdi]
vpsllvd xmm1, xmm0, xmmword ptr [rip + .LCPI0_0]
vpxor xmm1, xmm1, xmm0
vpsrlvd xmm1, xmm1, xmmword ptr [rip + .LCPI0_1]
vpand xmm0, xmm0, xmmword ptr [rip + .LCPI0_2]
vpsllvd xmm0, xmm0, xmmword ptr [rip + .LCPI0_3]
vpxor xmm0, xmm1, xmm0
vmovdqa xmmword ptr [rdi], xmm0
vpshufd xmm1, xmm0, 78
vpxor xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 229
vpxor xmm0, xmm0, xmm1
vmovd eax, xmm0
ret
.LCPI1_0:
.long 6
.long 2
.long 13
.long 3
.LCPI1_1:
.long 13
.long 27
.long 21
.long 12
.LCPI1_2:
.long 4294967294
.long 4294967288
.long 4294967280
.long 4294967168
.LCPI1_3:
.long 18
.long 2
.long 7
.long 13
example::Lsfr113::next2_u32:
vmovdqa xmm0, xmmword ptr [rdi]
vpsllvd xmm1, xmm0, xmmword ptr [rip + .LCPI1_0]
vpxor xmm1, xmm1, xmm0
vpsrlvd xmm1, xmm1, xmmword ptr [rip + .LCPI1_1]
vpand xmm0, xmm0, xmmword ptr [rip + .LCPI1_2]
vpsllvd xmm0, xmm0, xmmword ptr [rip + .LCPI1_3]
vpxor xmm0, xmm1, xmm0
vmovdqa xmmword ptr [rdi], xmm0
vmovd eax, xmm0
vpextrd ecx, xmm0, 1
xor ecx, eax
vpextrd edx, xmm0, 2
vpextrd eax, xmm0, 3
xor eax, edx
xor eax, ecx
ret
```
Both functions should generate the exact same ASM, but they do not.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180607/63d80928/attachment-0001.html>
More information about the llvm-bugs
mailing list