[llvm-bugs] [Bug 44478] New: [X86] Illegal vector length reductions cause partial scalarization

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Jan 7 05:29:06 PST 2020


https://bugs.llvm.org/show_bug.cgi?id=44478

            Bug ID: 44478
           Summary: [X86] Illegal vector length reductions cause partial
                    scalarization
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: llvm-dev at redking.me.uk
                CC: andrea.dibiagio at gmail.com, craig.topper at gmail.com,
                    llvm-bugs at lists.llvm.org, llvm-dev at redking.me.uk,
                    spatel+llvm at rotateright.com

https://gcc.godbolt.org/z/zbkZUp

clang -g0 -O3 -march=btver2

float sum4(int *x) {
    return x[0] + x[1] + x[2] + x[3];
}

float sum5(int *x) {
    return x[0] + x[1] + x[2] + x[3] + x[4];
}

_Z4sum4Pi: # @_Z4sum4Pi
  vmovdqu (%rdi), %xmm0
  vphaddd %xmm0, %xmm0, %xmm0
  vphaddd %xmm0, %xmm0, %xmm0
  vcvtdq2ps %xmm0, %xmm0
  retq

_Z4sum5Pi: # @_Z4sum5Pi
  vmovdqu (%rdi), %xmm0
  vphaddd %xmm0, %xmm0, %xmm0
  vphaddd %xmm0, %xmm0, %xmm0
  vmovd %xmm0, %eax
  addl 16(%rdi), %eax
  vcvtsi2ss %eax, %xmm1, %xmm0
  retq

It'd probably be better to keep sum5 etc. entirely on the vector unit and avoid
fpu<->gpu traffic

_Z4sum5Pi: # @_Z4sum5Pi
  vmovdqu (%rdi), %xmm0
  vphaddd %xmm0, %xmm0, %xmm0
  vphaddd %xmm0, %xmm0, %xmm0
  vmovd 16(%rdi), %xmm1
  vpaddd %xmm1, %xmm0, %xmm0
  vcvtdq2ps %xmm0, %xmm0
  retq

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200107/e24b8a00/attachment.html>


More information about the llvm-bugs mailing list