[llvm-bugs] [Bug 30780] New: [X86][SSE] Merging of zext into splat shuffle results in constant mask generation

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Oct 25 04:10:52 PDT 2016


https://llvm.org/bugs/show_bug.cgi?id=30780

            Bug ID: 30780
           Summary: [X86][SSE] Merging of zext into splat shuffle results
                    in constant mask generation
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: llvm-dev at redking.me.uk
                CC: llvm-bugs at lists.llvm.org, spatel+llvm at rotateright.com
    Classification: Unclassified

For the sext+splat we use movsbl and splat the sign extended i32. But for the
zext+splat we merge the zext into the splat shuffle, requiring a 16-byte
constant mask. If we splat to a 256-bit vector we end up with a 32-byte
constant mask!

; llc -mtriple=x86_64-unknown -mcpu=btver2

define <4 x i32> @splat_i8(i8 %in) {
  %zext = sext i8 %in to i32
  %insert = insertelement <4 x i32> undef, i32 %zext, i32 0
  %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32>
zeroinitializer
  ret <4 x i32> %splat
}
splat_i8:
  movsbl    %dil, %eax
  vmovd    %eax, %xmm0
  vpshufd    $0, %xmm0, %xmm0        # xmm0 = xmm0[0,0,0,0]
  retq

define <4 x i32> @splat_u8(i8 %in) {
  %zext = zext i8 %in to i32
  %insert = insertelement <4 x i32> undef, i32 %zext, i32 0
  %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32>
zeroinitializer
  ret <4 x i32> %splat
}

splat_u8:
  vmovd    %edi, %xmm0
  vpshufd    $0, %xmm0, %xmm0        # xmm0 = xmm0[0,0,0,0]
  vpand    .LCPI1_0(%rip), %xmm0, %xmm0
  retq

define <8 x i32> @splat_u8_256(i8 %in) {
  %zext = zext i8 %in to i32
  %insert = insertelement <8 x i32> undef, i32 %zext, i32 0
  %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32>
zeroinitializer
  ret <8 x i32> %splat
}

splat_u8_256:
  vmovd    %edi, %xmm0
  vpshufd    $0, %xmm0, %xmm0        # xmm0 = xmm0[0,0,0,0]
  vinsertf128    $1, %xmm0, %ymm0, %ymm0
  vandps    .LCPI2_0(%rip), %ymm0, %ymm0
  retq

Interestingly, on AVX2 machines this doesn't occur:

; -mtriple=x86_64-unknown -mcpu=bdver4

splat_u8:
  movzbl    %dil, %eax
  vmovd    %eax, %xmm0
  vbroadcastss    %xmm0, %xmm0
  retq

splat_u8_256:
  movzbl    %dil, %eax
  vmovd    %eax, %xmm0
  vbroadcastss    %xmm0, %ymm0
  retq

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20161025/68a61972/attachment.html>


More information about the llvm-bugs mailing list