[llvm-bugs] [Bug 30780] New: [X86][SSE] Merging of zext into splat shuffle results in constant mask generation
via llvm-bugs
llvm-bugs at lists.llvm.org
Tue Oct 25 04:10:52 PDT 2016
https://llvm.org/bugs/show_bug.cgi?id=30780
Bug ID: 30780
Summary: [X86][SSE] Merging of zext into splat shuffle results
in constant mask generation
Product: libraries
Version: trunk
Hardware: PC
OS: Windows NT
Status: NEW
Severity: normal
Priority: P
Component: Backend: X86
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: llvm-bugs at lists.llvm.org, spatel+llvm at rotateright.com
Classification: Unclassified
For the sext+splat we use movsbl and splat the sign extended i32. But for the
zext+splat we merge the zext into the splat shuffle, requiring a 16-byte
constant mask. If we splat to a 256-bit vector we end up with a 32-byte
constant mask!
; llc -mtriple=x86_64-unknown -mcpu=btver2
define <4 x i32> @splat_i8(i8 %in) {
%zext = sext i8 %in to i32
%insert = insertelement <4 x i32> undef, i32 %zext, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32>
zeroinitializer
ret <4 x i32> %splat
}
splat_i8:
movsbl %dil, %eax
vmovd %eax, %xmm0
vpshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
retq
define <4 x i32> @splat_u8(i8 %in) {
%zext = zext i8 %in to i32
%insert = insertelement <4 x i32> undef, i32 %zext, i32 0
%splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32>
zeroinitializer
ret <4 x i32> %splat
}
splat_u8:
vmovd %edi, %xmm0
vpshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
vpand .LCPI1_0(%rip), %xmm0, %xmm0
retq
define <8 x i32> @splat_u8_256(i8 %in) {
%zext = zext i8 %in to i32
%insert = insertelement <8 x i32> undef, i32 %zext, i32 0
%splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32>
zeroinitializer
ret <8 x i32> %splat
}
splat_u8_256:
vmovd %edi, %xmm0
vpshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
vinsertf128 $1, %xmm0, %ymm0, %ymm0
vandps .LCPI2_0(%rip), %ymm0, %ymm0
retq
Interestingly, on AVX2 machines this doesn't occur:
; -mtriple=x86_64-unknown -mcpu=bdver4
splat_u8:
movzbl %dil, %eax
vmovd %eax, %xmm0
vbroadcastss %xmm0, %xmm0
retq
splat_u8_256:
movzbl %dil, %eax
vmovd %eax, %xmm0
vbroadcastss %xmm0, %ymm0
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20161025/68a61972/attachment.html>
More information about the llvm-bugs
mailing list