[llvm-bugs] [Bug 52500] New: Generate inefficient code after canonicalize splat shuffle after cmp
via llvm-bugs
llvm-bugs at lists.llvm.org
Sat Nov 13 18:35:41 PST 2021
https://bugs.llvm.org/show_bug.cgi?id=52500
Bug ID: 52500
Summary: Generate inefficient code after canonicalize splat
shuffle after cmp
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: ken1979.luo at gmail.com
CC: llvm-bugs at lists.llvm.org
On X86-AVX512 the result of the vector compare instruction would be in %k
register, but there is no shuffle instruction for %k register. Here is the test
case that was regressed due to canonicalize splat shuffle after cmp. It can be
duplicated with "llc -mcpu=skylake-avx512".
Before canonicalization, the code is as this.
cat shufvXi32.ll
define <16 x i1> @shuffle(<16 x i1> %msk, i32 %in) {
entry:
%insrt = insertelement <16 x i32> undef, i32 %in, i32 0
%splat = shufflevector <16 x i32> %insrt, <16 x i32> poison, <16 x i32>
zeroinitializer
%mul = mul <16 x i32> <i32 789, i32 789, i32 789, i32 789, i32 789, i32 789,
i32 789, i32 789, i32 789, i32 789, i32 789, i32 789, i32 789, i32 789, i32
789, i32 789>, %splat
%cmp1 = icmp eq <16 x i32> %mul, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0,
i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%and = and <16 x i1> %msk, %cmp1
ret <16 x i1> %and
}
After canonicalizaton, the code is as this.
opt -S < shufvxi32.ll -instcombine -o shufvXi1.ll
define <16 x i1> @shuffle(<16 x i1> %msk, i32 %in) {
entry:
%insrt = insertelement <16 x i32> undef, i32 %in, i32 0
%0 = mul <16 x i32> %insrt, <i32 789, i32 poison, i32 poison, i32 poison, i32
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison,
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
%1 = icmp eq <16 x i32> %0, zeroinitializer
%cmp1 = shufflevector <16 x i1> %1, <16 x i1> poison, <16 x i32>
zeroinitializer
%and = and <16 x i1> %cmp1, %msk
ret <16 x i1> %and
}
llc -mcpu=skylake-avx512 shufvXi32.ll
We got below assembly
# %bb.0: # %entry
vpsllw $7, %xmm0, %xmm0
vpmovb2m %xmm0, %k1
vpbroadcastd %edi, %zmm0
vpmulld .LCPI0_0(%rip){1to16}, %zmm0, %zmm0
vptestnmd %zmm0, %zmm0, %k0 {%k1}
vpmovm2b %k0, %xmm0
vzeroupper
retq
llc -mcpu=skylake-avx512 shufvXi1.ll
We got below assembly.
# %bb.0: # %entry
vpsllw $7, %xmm0, %xmm0
vpxor %xmm1, %xmm1, %xmm1
vmovd %edi, %xmm2
movl $789, %eax # imm = 0x315
vmovd %eax, %xmm3
vpmulld %xmm3, %xmm2, %xmm2
vptestnmd %zmm2, %zmm2, %k0
vpmovm2w %k0, %ymm2
vpbroadcastw %xmm2, %ymm2
vpmovw2m %ymm2, %k1
vpcmpgtb %xmm0, %xmm1, %k0 {%k1}
vpmovm2b %k0, %xmm0
vzeroupper
retq
We can see there is more instruction generated for shufvXi1.ll.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20211114/1ba5d35f/attachment.html>
More information about the llvm-bugs
mailing list