[llvm-bugs] [Bug 32449] New: Opportunity to combine shuffles of splats with multiple uses

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Mar 28 09:17:36 PDT 2017


https://bugs.llvm.org/show_bug.cgi?id=32449

            Bug ID: 32449
           Summary: Opportunity to combine shuffles of splats with
                    multiple uses
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: zvi.rackover at intel.com
                CC: llvm-bugs at lists.llvm.org

For the following three functions:

 define <2 x double> @foo2(<2 x double> %v, <2 x double> *%p) nounwind {
   %res = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1,
i32 1>
   %res1 = shufflevector<2 x double> %res, <2 x double> undef, <2 x i32> <i32
1, i32 undef>
   store <2 x double> %res, <2 x double>* %p
   ret <2 x double> %res1
 }

 define <4 x double> @foo4(<4 x double> %v, <4 x double> *%p) nounwind {
   %res = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 2,
i32 2, i32 2, i32 2>
   %res1 = shufflevector<4 x double> %res, <4 x double> undef, <4 x i32> <i32
2, i32 0, i32 undef, i32 undef>
   store <4 x double> %res, <4 x double>* %p
   ret <4 x double> %res1
 }

 define <8 x float> @foo8(<8 x float> %v, <8 x float> *%p) nounwind {
   %res = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 5,
i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
   %res1 = shufflevector<8 x float> %res, <8 x float> undef, <8 x i32> <i32 2,
i32 0, i32 undef, i32 undef, i32 5, i32 1, i32 3, i32 7>
   store <8 x float> %res, <8 x float>* %p
   ret <8 x float> %res1
 }


llc -mcpu=skylake generates respectively:

foo2:
        vpermilpd       $3, %xmm0, %xmm1 # xmm1 = xmm0[1,1]
        vpermilpd       $1, %xmm1, %xmm0 # xmm0 = xmm1[1,0]
        vmovapd %xmm1, (%rdi)
        retq

foo4:
        vpermpd $170, %ymm0, %ymm1      # ymm1 = ymm0[2,2,2,2]
        vpermpd $226, %ymm1, %ymm0      # ymm0 = ymm1[2,0,2,3]
        vmovapd %ymm1, (%rdi)
        retq

foo8:
        vmovshdup       %ymm0, %ymm0    # ymm0 = ymm0[1,1,3,3,5,5,7,7]
        vpermpd $170, %ymm0, %ymm1      # ymm1 = ymm0[2,2,2,2]
        vmovaps .LCPI2_0(%rip), %ymm0   # ymm0 = <2,0,u,u,5,1,3,7>
        vpermps %ymm1, %ymm0, %ymm0
        vmovapd %ymm1, (%rdi)
        retq


In foo2() it's easy to see that the second vpermild is redundant.
In foo4() similiarly the second vpermpd is redundant.
Not sure why in foo8 the splat-shuffle and the non-splat shuffle reversed, but
it can also be improved.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170328/90aea9a0/attachment.html>


More information about the llvm-bugs mailing list