[llvm-bugs] [Bug 34369] New: [X86][AVX512] suboptimal shuffle sequence instead of one vpermw instruction

via llvm-bugs llvm-bugs at lists.llvm.org
Wed Aug 30 01:21:55 PDT 2017


https://bugs.llvm.org/show_bug.cgi?id=34369

            Bug ID: 34369
           Summary: [X86][AVX512] suboptimal shuffle sequence instead of
                    one vpermw instruction
           Product: libraries
           Version: trunk
          Hardware: All
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: ayman.musa at intel.com
                CC: llvm-bugs at lists.llvm.org

For the following IR:

define <16 x i16> @test(<16 x i16> %vec) {
   %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3,
i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32
8, i32 9, i32 10, i32 12, i32 12>
   %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1
1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1>, <16 x i16> %shuf, <16 x i16>
zeroinitializer
   ret <16 x i16> %res
}

>> llc -mcpu=skx <file-name> -o out.s


LLVM emits (showing 9.52 throughput on IACA tool):
     vextracti128    $1, %ymm0, %xmm1
     vpshufb .LCPI41_0(%rip), %xmm1, %xmm2 # xmm2 =
xmm1[8,9,10,11,4,5,10,11,8,9,10,11,4,5,4,5]
     vpshufb .LCPI41_1(%rip), %xmm0, %xmm0 # xmm0 =
xmm0[6,7,0,1,0,1,6,7,10,11,4,5,4,5,6,7]
     vpblendw    $136, %xmm2, %xmm0, %xmm0 # xmm0 =
xmm0[0,1,2],xmm2[3],xmm0[4,5,6],xmm2[7]
     vpshufb .LCPI41_2(%rip), %xmm1, %xmm1 # xmm1 =
xmm1[14,15,0,1,12,13,0,1,2,3,4,5,8,9,8,9]
     vinserti128 $1, %xmm1, %ymm0, %ymm0
     movw    $-1129, %ax             # imm = 0xFB97
     kmovd   %eax, %k1
     vmovdqu16   %ymm0, %ymm0 {%k1} {z}
     retq  

While it can be replaced with (showing 5.76 throughput on IACA tool):
     vmovdqu .LCPI43_0(%rip), %ymm1 # ymm1 =
[3,0,0,13,5,2,2,10,15,8,14,8,9,10,12,12]
     movw $-1129, %ax
     kmovd %eax, %k1
     vpermw %ymm0, %ymm1, %ymm0 {%k1} {z}

     retq

** Throughput results from IACA tool => lower is better.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170830/88c0cf06/attachment.html>


More information about the llvm-bugs mailing list