[llvm-bugs] [Bug 37890] New: Equivalent vector shuffles emit widely different x86 avx2 code

via llvm-bugs llvm-bugs at lists.llvm.org
Thu Jun 21 01:49:53 PDT 2018


https://bugs.llvm.org/show_bug.cgi?id=37890

            Bug ID: 37890
           Summary: Equivalent vector shuffles emit widely different x86
                    avx2 code
           Product: new-bugs
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: gonzalobg88 at gmail.com
                CC: chandlerc at gmail.com, hfinkel at anl.gov,
                    llvm-bugs at lists.llvm.org, llvm-dev at redking.me.uk,
                    spatel+llvm at rotateright.com

The following LLVM-IR snippets perform something equivalent to this (in Rust
pseudo-code):

let x: i32x4;
let y = shuffle!(x, [2, 3, undef, undef]); // i32x4(2, 3, undef, undef)
let a = x + y;  // i32x4(2, 4, undef, undef)
let b = shuffle!(x, [1, undef, undef, undef]);  // i32x4(4, undef, undef,
undef)
let c = a + b; // i32x4(6, undef, undef, undef);
let result: i32 = c.extract(0);

The following 3 snippets of LLVM IR all emit different assembly when compiled
with "llc -O3 -mattr=avx2"

# Snippet 1 - reduction into smaller vectors that avoids undef

define i32 @add_and_reduce(<4 x i32> %a) {
  %b =  shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
  %as = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
  %sum = add <2 x i32> %as, %b
  %c = extractelement <2 x i32> %sum, i32 0
  %d = extractelement <2 x i32> %sum, i32 1
  %r = add i32 %c, %d
  ret i32 %r
}

emits

  vpshufd xmm1, xmm0, 250 # xmm1 = xmm0[2,2,3,3]
  vpmovzxdq xmm0, xmm0 # xmm0 = xmm0[0],zero,xmm0[1],zero
  vpaddq xmm0, xmm0, xmm1
  vmovd ecx, xmm0
  vpextrd eax, xmm0, 2
  add eax, ecx
  ret

# Snippet 2 - reduction using 0 instead of undef

define i32 @add_and_reduce(<4 x i32> %a) {
  %b =  shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 3, i32
0, i32 0>
  %sum = add <4 x i32> %a, %b
  %c = shufflevector <4 x i32> %sum, <4 x i32> %sum, <4 x i32> <i32 1, i32 0,
i32 0, i32 0>
  %d = add <4 x i32> %sum, %c
  %r = extractelement <4 x i32> %d, i32 0
  ret i32 %r
}

emits

  vpshufd xmm1, xmm0, 14 # xmm1 = xmm0[2,3,0,0]
  vpaddd xmm0, xmm0, xmm1
  vpshufd xmm1, xmm0, 1 # xmm1 = xmm0[1,0,0,0]
  vpaddd xmm0, xmm0, xmm1
  vmovd eax, xmm0
  ret

# Snippet 3 - reduction using undef

define i32 @add_and_reduce(<4 x i32> %a) {
  %b =  shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 2, i32 3, i32
undef, i32 undef>
  %sum = add <4 x i32> %a, %b
  %c = shufflevector <4 x i32> %sum, <4 x i32> %sum, <4 x i32> <i32 1, i32
undef, i32 undef, i32 undef>
  %d = add <4 x i32> %sum, %c
  %r = extractelement <4 x i32> %d, i32 0
  ret i32 %r
}

emits

  vpshufd xmm1, xmm0, 78 # xmm1 = xmm0[2,3,0,1]
  vpaddd xmm0, xmm0, xmm1
  vphaddd xmm0, xmm0, xmm0
  vmovd eax, xmm0
  ret

---

I've commented about this on the RFC for Rust's portable packed vector
extensions [0], and whether this is a bug is intended or not might influence
how we end up supporting portable shuffles (e.g. do we need to support passing
`undef` as a shufflevector index?).

[0] https://github.com/rust-lang/rfcs/pull/2366#issuecomment-399027304

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180621/664b7620/attachment.html>


More information about the llvm-bugs mailing list