<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - llvm.experimental.vector.reduce.xor and a extractelement+xors produce different code"
href="https://bugs.llvm.org/show_bug.cgi?id=37731">37731</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>llvm.experimental.vector.reduce.xor and a extractelement+xors produce different code
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>gonzalobg88@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>The following Rust code is a SIMD implementation of the Lsfr113 PRNG (godbolt:
<a href="https://godbolt.org/g/yDPyds">https://godbolt.org/g/yDPyds</a>):
```rust
#![feature(stdsimd)]
use std::simd::*;
pub struct Lsfr113 {
u: u32x4,
}
impl Lsfr113 {
pub fn next_u32(&mut self) -> u32 {
const SHL: u32x4 = u32x4::new(6, 2, 13, 3);
const SHR: u32x4 = u32x4::new(13, 27, 21, 12);
const AND: u32x4 = u32x4::new(4294967294, 4294967288, 4294967280,
4294967168);
const SHL2: u32x4 = u32x4::new(18, 2, 7, 13);
let b = ((self.u << SHL) ^ self.u) >> SHR;
self.u = ((self.u & AND) << SHL2) ^ b;
self.u.xor()
}
pub fn next2_u32(&mut self) -> u32 {
const SHL: u32x4 = u32x4::new(6, 2, 13, 3);
const SHR: u32x4 = u32x4::new(13, 27, 21, 12);
const AND: u32x4 = u32x4::new(4294967294, 4294967288, 4294967280,
4294967168);
const SHL2: u32x4 = u32x4::new(18, 2, 7, 13);
let b = ((self.u << SHL) ^ self.u) >> SHR;
self.u = ((self.u & AND) << SHL2) ^ b;
self.u.extract(0)
^ self.u.extract(1)
^ self.u.extract(2)
^ self.u.extract(3)
}
}
```
Using `-C opt-level=3 -C target-cpu=native -C panic=abort -C debuginfo=0
--emit=llvm-ir` it emits the following LLVM-IR (godbolt:
<a href="https://godbolt.org/g/g8ZVWn">https://godbolt.org/g/g8ZVWn</a>)
```llvm-ir
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @example::Lsfr113::next_u32(<4 x i32>* noalias nocapture
dereferenceable(16) %self) unnamed_addr #0 {
%0 = load <4 x i32>, <4 x i32>* %self, align 16
%1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
%2 = xor <4 x i32> %1, %0
%3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
%4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
%5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
%6 = xor <4 x i32> %3, %5
store <4 x i32> %6, <4 x i32>* %self, align 16
%7 = tail call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
%6) #2
ret i32 %7
}
define i32 @example::Lsfr113::next2_u32(<4 x i32>* noalias nocapture
dereferenceable(16) %self) unnamed_addr #0 {
%0 = load <4 x i32>, <4 x i32>* %self, align 16
%1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
%2 = xor <4 x i32> %1, %0
%3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
%4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
%5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
%6 = xor <4 x i32> %3, %5
store <4 x i32> %6, <4 x i32>* %self, align 16
%7 = extractelement <4 x i32> %6, i32 0
%8 = extractelement <4 x i32> %6, i32 1
%9 = xor i32 %7, %8
%10 = extractelement <4 x i32> %6, i32 2
%11 = xor i32 %9, %10
%12 = extractelement <4 x i32> %6, i32 3
%13 = xor i32 %11, %12
ret i32 %13
}
declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>) #1
attributes #0 = { nounwind "probe-stack"="__rust_probestack" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
```
which generates the following assembly:
```asm
.LCPI0_0:
.long 6
.long 2
.long 13
.long 3
.LCPI0_1:
.long 13
.long 27
.long 21
.long 12
.LCPI0_2:
.long 4294967294
.long 4294967288
.long 4294967280
.long 4294967168
.LCPI0_3:
.long 18
.long 2
.long 7
.long 13
example::Lsfr113::next_u32:
vmovdqa xmm0, xmmword ptr [rdi]
vpsllvd xmm1, xmm0, xmmword ptr [rip + .LCPI0_0]
vpxor xmm1, xmm1, xmm0
vpsrlvd xmm1, xmm1, xmmword ptr [rip + .LCPI0_1]
vpand xmm0, xmm0, xmmword ptr [rip + .LCPI0_2]
vpsllvd xmm0, xmm0, xmmword ptr [rip + .LCPI0_3]
vpxor xmm0, xmm1, xmm0
vmovdqa xmmword ptr [rdi], xmm0
vpshufd xmm1, xmm0, 78
vpxor xmm0, xmm0, xmm1
vpshufd xmm1, xmm0, 229
vpxor xmm0, xmm0, xmm1
vmovd eax, xmm0
ret
.LCPI1_0:
.long 6
.long 2
.long 13
.long 3
.LCPI1_1:
.long 13
.long 27
.long 21
.long 12
.LCPI1_2:
.long 4294967294
.long 4294967288
.long 4294967280
.long 4294967168
.LCPI1_3:
.long 18
.long 2
.long 7
.long 13
example::Lsfr113::next2_u32:
vmovdqa xmm0, xmmword ptr [rdi]
vpsllvd xmm1, xmm0, xmmword ptr [rip + .LCPI1_0]
vpxor xmm1, xmm1, xmm0
vpsrlvd xmm1, xmm1, xmmword ptr [rip + .LCPI1_1]
vpand xmm0, xmm0, xmmword ptr [rip + .LCPI1_2]
vpsllvd xmm0, xmm0, xmmword ptr [rip + .LCPI1_3]
vpxor xmm0, xmm1, xmm0
vmovdqa xmmword ptr [rdi], xmm0
vmovd eax, xmm0
vpextrd ecx, xmm0, 1
xor ecx, eax
vpextrd edx, xmm0, 2
vpextrd eax, xmm0, 3
xor eax, edx
xor eax, ecx
ret
```
Both functions should generate the exact same ASM, but they do not.</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>