[llvm-bugs] [Bug 52500] New: Generate inefficient code after canonicalize splat shuffle after cmp

via llvm-bugs llvm-bugs at lists.llvm.org
Sat Nov 13 18:35:41 PST 2021


https://bugs.llvm.org/show_bug.cgi?id=52500

            Bug ID: 52500
           Summary: Generate inefficient code after canonicalize splat
                    shuffle after cmp
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Scalar Optimizations
          Assignee: unassignedbugs at nondot.org
          Reporter: ken1979.luo at gmail.com
                CC: llvm-bugs at lists.llvm.org

On X86-AVX512 the result of the vector compare instruction would be in %k
register, but there is no shuffle instruction for %k register. Here is the test
case that was regressed due to canonicalize splat shuffle after cmp. It can be
duplicated with "llc -mcpu=skylake-avx512".

Before canonicalization, the code is as this.
cat shufvXi32.ll

define <16 x i1> @shuffle(<16 x i1> %msk, i32 %in) {
entry:
  %insrt = insertelement <16 x i32> undef, i32 %in, i32 0
  %splat = shufflevector <16 x i32> %insrt, <16 x i32> poison, <16 x i32>
zeroinitializer
  %mul = mul <16 x i32> <i32 789, i32 789, i32 789, i32 789, i32 789, i32 789,
i32 789, i32 789, i32 789, i32 789, i32 789, i32 789, i32 789, i32 789, i32
789, i32 789>, %splat
  %cmp1 = icmp eq <16 x i32> %mul, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
  %and = and <16 x i1> %msk, %cmp1
  ret <16 x i1> %and
}

After canonicalizaton, the code is as this.
opt -S < shufvxi32.ll -instcombine -o shufvXi1.ll

define <16 x i1> @shuffle(<16 x i1> %msk, i32 %in) {
entry:
  %insrt = insertelement <16 x i32> undef, i32 %in, i32 0
  %0 = mul <16 x i32> %insrt, <i32 789, i32 poison, i32 poison, i32 poison, i32
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
  %1 = icmp eq <16 x i32> %0, zeroinitializer
  %cmp1 = shufflevector <16 x i1> %1, <16 x i1> poison, <16 x i32>
zeroinitializer
  %and = and <16 x i1> %cmp1, %msk
  ret <16 x i1> %and
}

llc -mcpu=skylake-avx512 shufvXi32.ll
We got below assembly

# %bb.0:                                # %entry
        vpsllw  $7, %xmm0, %xmm0
        vpmovb2m        %xmm0, %k1
        vpbroadcastd    %edi, %zmm0
        vpmulld .LCPI0_0(%rip){1to16}, %zmm0, %zmm0
        vptestnmd       %zmm0, %zmm0, %k0 {%k1}
        vpmovm2b        %k0, %xmm0
        vzeroupper
        retq
llc -mcpu=skylake-avx512 shufvXi1.ll
We got below assembly.

# %bb.0:                                # %entry
        vpsllw  $7, %xmm0, %xmm0
        vpxor   %xmm1, %xmm1, %xmm1
        vmovd   %edi, %xmm2
        movl    $789, %eax                      # imm = 0x315
        vmovd   %eax, %xmm3
        vpmulld %xmm3, %xmm2, %xmm2
        vptestnmd       %zmm2, %zmm2, %k0
        vpmovm2w        %k0, %ymm2
        vpbroadcastw    %xmm2, %ymm2
        vpmovw2m        %ymm2, %k1
        vpcmpgtb        %xmm0, %xmm1, %k0 {%k1}
        vpmovm2b        %k0, %xmm0
        vzeroupper
        retq
We can see there is more instruction generated for shufvXi1.ll.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20211114/1ba5d35f/attachment.html>


More information about the llvm-bugs mailing list