[llvm-bugs] [Bug 37731] New: llvm.experimental.vector.reduce.xor and a extractelement+xors produce different code

via llvm-bugs llvm-bugs at lists.llvm.org
Thu Jun 7 01:06:30 PDT 2018


https://bugs.llvm.org/show_bug.cgi?id=37731

            Bug ID: 37731
           Summary: llvm.experimental.vector.reduce.xor and a
                    extractelement+xors produce different code
           Product: new-bugs
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: gonzalobg88 at gmail.com
                CC: llvm-bugs at lists.llvm.org

The following Rust code is a SIMD implementation of the Lsfr113 PRNG (godbolt:
https://godbolt.org/g/yDPyds): 

```rust
#![feature(stdsimd)]
use std::simd::*;

pub struct Lsfr113 {
    u: u32x4,
}

impl Lsfr113 {
    pub fn next_u32(&mut self) -> u32 {
        const SHL: u32x4 = u32x4::new(6, 2, 13, 3);
        const SHR: u32x4 = u32x4::new(13, 27, 21, 12);
        const AND: u32x4 = u32x4::new(4294967294, 4294967288, 4294967280,
4294967168);
        const SHL2: u32x4 = u32x4::new(18, 2, 7, 13);

        let b = ((self.u << SHL) ^ self.u) >> SHR;
        self.u = ((self.u & AND) << SHL2) ^ b;
        self.u.xor()
    }
    pub fn next2_u32(&mut self) -> u32 {
        const SHL: u32x4 = u32x4::new(6, 2, 13, 3);
        const SHR: u32x4 = u32x4::new(13, 27, 21, 12);
        const AND: u32x4 = u32x4::new(4294967294, 4294967288, 4294967280,
4294967168);
        const SHL2: u32x4 = u32x4::new(18, 2, 7, 13);

        let b = ((self.u << SHL) ^ self.u) >> SHR;
        self.u = ((self.u & AND) << SHL2) ^ b;
        self.u.extract(0) 
        ^ self.u.extract(1) 
        ^ self.u.extract(2) 
        ^ self.u.extract(3)
    }
}
```

Using `-C opt-level=3 -C target-cpu=native -C panic=abort -C debuginfo=0
--emit=llvm-ir` it emits the following LLVM-IR (godbolt:
https://godbolt.org/g/g8ZVWn)

```llvm-ir
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i32 @example::Lsfr113::next_u32(<4 x i32>* noalias nocapture
dereferenceable(16) %self) unnamed_addr #0 {
  %0 = load <4 x i32>, <4 x i32>* %self, align 16
  %1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
  %2 = xor <4 x i32> %1, %0
  %3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
  %4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
  %5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
  %6 = xor <4 x i32> %3, %5
  store <4 x i32> %6, <4 x i32>* %self, align 16
  %7 = tail call i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>
%6) #2
  ret i32 %7
}

define i32 @example::Lsfr113::next2_u32(<4 x i32>* noalias nocapture
dereferenceable(16) %self) unnamed_addr #0 {
  %0 = load <4 x i32>, <4 x i32>* %self, align 16
  %1 = shl <4 x i32> %0, <i32 6, i32 2, i32 13, i32 3>
  %2 = xor <4 x i32> %1, %0
  %3 = lshr <4 x i32> %2, <i32 13, i32 27, i32 21, i32 12>
  %4 = and <4 x i32> %0, <i32 -2, i32 -8, i32 -16, i32 -128>
  %5 = shl <4 x i32> %4, <i32 18, i32 2, i32 7, i32 13>
  %6 = xor <4 x i32> %3, %5
  store <4 x i32> %6, <4 x i32>* %self, align 16
  %7 = extractelement <4 x i32> %6, i32 0
  %8 = extractelement <4 x i32> %6, i32 1
  %9 = xor i32 %7, %8
  %10 = extractelement <4 x i32> %6, i32 2
  %11 = xor i32 %9, %10
  %12 = extractelement <4 x i32> %6, i32 3
  %13 = xor i32 %11, %12
  ret i32 %13
}

declare i32 @llvm.experimental.vector.reduce.xor.i32.v4i32(<4 x i32>) #1

attributes #0 = { nounwind "probe-stack"="__rust_probestack" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
```

which generates the following assembly: 

```asm
.LCPI0_0:
  .long 6
  .long 2
  .long 13
  .long 3
.LCPI0_1:
  .long 13
  .long 27
  .long 21
  .long 12
.LCPI0_2:
  .long 4294967294
  .long 4294967288
  .long 4294967280
  .long 4294967168
.LCPI0_3:
  .long 18
  .long 2
  .long 7
  .long 13
example::Lsfr113::next_u32:
  vmovdqa xmm0, xmmword ptr [rdi]
  vpsllvd xmm1, xmm0, xmmword ptr [rip + .LCPI0_0]
  vpxor xmm1, xmm1, xmm0
  vpsrlvd xmm1, xmm1, xmmword ptr [rip + .LCPI0_1]
  vpand xmm0, xmm0, xmmword ptr [rip + .LCPI0_2]
  vpsllvd xmm0, xmm0, xmmword ptr [rip + .LCPI0_3]
  vpxor xmm0, xmm1, xmm0
  vmovdqa xmmword ptr [rdi], xmm0
  vpshufd xmm1, xmm0, 78
  vpxor xmm0, xmm0, xmm1
  vpshufd xmm1, xmm0, 229
  vpxor xmm0, xmm0, xmm1
  vmovd eax, xmm0
  ret

.LCPI1_0:
  .long 6
  .long 2
  .long 13
  .long 3
.LCPI1_1:
  .long 13
  .long 27
  .long 21
  .long 12
.LCPI1_2:
  .long 4294967294
  .long 4294967288
  .long 4294967280
  .long 4294967168
.LCPI1_3:
  .long 18
  .long 2
  .long 7
  .long 13
example::Lsfr113::next2_u32:
  vmovdqa xmm0, xmmword ptr [rdi]
  vpsllvd xmm1, xmm0, xmmword ptr [rip + .LCPI1_0]
  vpxor xmm1, xmm1, xmm0
  vpsrlvd xmm1, xmm1, xmmword ptr [rip + .LCPI1_1]
  vpand xmm0, xmm0, xmmword ptr [rip + .LCPI1_2]
  vpsllvd xmm0, xmm0, xmmword ptr [rip + .LCPI1_3]
  vpxor xmm0, xmm1, xmm0
  vmovdqa xmmword ptr [rdi], xmm0
  vmovd eax, xmm0
  vpextrd ecx, xmm0, 1
  xor ecx, eax
  vpextrd edx, xmm0, 2
  vpextrd eax, xmm0, 3
  xor eax, edx
  xor eax, ecx
  ret
```

Both functions should generate the exact same ASM, but they do not.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180607/63d80928/attachment-0001.html>


More information about the llvm-bugs mailing list