[llvm-bugs] [Bug 44008] New: [SLPVectorizer] Failure to keep complex math vectorized

Fri Nov 15 05:26:17 PST 2019

https://bugs.llvm.org/show_bug.cgi?id=44008

            Bug ID: 44008
           Summary: [SLPVectorizer] Failure to keep complex math
                    vectorized
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: llvm-dev at redking.me.uk
                CC: a.bataev at hotmail.com, craig.topper at gmail.com,
                    dtemirbulatov at gmail.com, llvm-bugs at lists.llvm.org,
                    spatel+llvm at rotateright.com, v.porpodas at gmail.com

https://godbolt.org/z/YgBdFz

#include <complex>

std::complex<float> mul_f32(std::complex<float> &A) {
    return (2.0f * A);
}

std::complex<float> fma_f32(std::complex<float> &A, float B) {
    return (2.0f * A) + B;
}

for the simple mul case we manage to keep the real+imag components vectorized
as a <2 x float>:

clang -g0 -O3 -march=btver2

mul_f32:
  vmovsd (%rdi), %xmm0 # xmm0 = mem[0],zero
  vaddps %xmm0, %xmm0, %xmm0
  retq

but with the fma case, we end up scalarizing:

fma_f32:
  vmovsd (%rdi), %xmm1 # xmm1 = mem[0],zero
  vmovshdup %xmm1, %xmm2 # xmm2 = xmm1[1,1,3,3]
  vaddss %xmm1, %xmm1, %xmm1
  vaddss %xmm2, %xmm2, %xmm2
  vaddss %xmm0, %xmm1, %xmm0
  vinsertps $16, %xmm2, %xmm0, %xmm0 # xmm0 = xmm0[0],xmm2[0],xmm0[2,3]
  retq

Ideally this would be:

fma_f32:
  vmovsd (%rdi), %xmm1 # xmm1 = mem[0],zero
  vaddps %xmm1, %xmm1, %xmm1
  vaddss %xmm0, %xmm1, %xmm1 # xmm1[0] += xmm0[0]
  vmovaps %xmm1, %xmm0
  retq

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191115/af79b635/attachment.html>