[LLVMbugs] [Bug 12413] New: shuffle patterns not recognized with avx

bugzilla-daemon at llvm.org bugzilla-daemon at llvm.org
Thu Mar 29 14:22:55 PDT 2012


http://llvm.org/bugs/show_bug.cgi?id=12413

             Bug #: 12413
           Summary: shuffle patterns not recognized with avx
           Product: libraries
           Version: trunk
          Platform: PC
        OS/Version: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
        AssignedTo: unassignedbugs at nondot.org
        ReportedBy: sroland at vmware.com
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified


Some shuffles generate good code without avx, but fail if avx is available.

define <32 x i8> @shuf(<32 x i8> %inval1, <32 x i8> %inval2) {
entry:
  %0 = shufflevector <32 x i8> %inval1, <32 x i8> %inval2, <32 x i32> <i32 0,
i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32
22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32
42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32
62>
  ret <32 x i8> %0
}

Without AVX this generates:
    movdqa    .LCPI0_0(%rip), %xmm4
    pshufb    %xmm4, %xmm1
    movdqa    .LCPI0_1(%rip), %xmm5
    pshufb    %xmm5, %xmm0
    por    %xmm1, %xmm0
    pshufb    %xmm4, %xmm3
    pshufb    %xmm5, %xmm2
    por    %xmm3, %xmm2
    movdqa    %xmm2, %xmm1
    ret

(as a side note I'm not really sure how passing 256bit values in/out of
functions without avx is really defined but it seems to have worked ok)

But with -mattr=avx code generation is unable to figure out it should use
vextractf128 then just do the same as above, instead it uses single-byte
extracts/inserts and produces this hilarious code:

    pushq    %rbp
.Ltmp3:
    .cfi_def_cfa_offset 16
.Ltmp4:
    .cfi_offset %rbp, -16
    movq    %rsp, %rbp
.Ltmp5:
    .cfi_def_cfa_register %rbp
    pushq    %r15
    pushq    %r14
    pushq    %rbx
.Ltmp6:
    .cfi_offset %rbx, -40
.Ltmp7:
    .cfi_offset %r14, -32
.Ltmp8:
    .cfi_offset %r15, -24
    vpextrb    $2, %xmm1, %eax
    vpextrb    $0, %xmm1, %ecx
    vmovd    %ecx, %xmm2
    vpinsrb    $1, %eax, %xmm2, %xmm2
    vpextrb    $4, %xmm1, %eax
    vpinsrb    $2, %eax, %xmm2, %xmm2
    vpextrb    $6, %xmm1, %eax
    vpextrb    $2, %xmm0, %ecx
    vpextrb    $0, %xmm0, %edx
    vmovd    %edx, %xmm3
    vpinsrb    $1, %ecx, %xmm3, %xmm4
    vpinsrb    $3, %eax, %xmm2, %xmm5
    vpextrb    $8, %xmm1, %edx
    vextractf128    $1, %ymm0, %xmm3
    vpextrb    $4, %xmm0, %esi
    vpextrb    $10, %xmm1, %eax
    vpextrb    $12, %xmm1, %ecx
    vextractf128    $1, %ymm1, %xmm2
    vpextrb    $14, %xmm3, %r8d
    vpextrb    $12, %xmm3, %r9d
    vpextrb    $10, %xmm3, %r10d
    vpextrb    $8, %xmm3, %r11d
    vpextrb    $6, %xmm3, %r14d
    vpinsrb    $4, %edx, %xmm5, %xmm5
    vpinsrb    $2, %esi, %xmm4, %xmm4
    vpextrb    $6, %xmm0, %edx
    vpinsrb    $3, %edx, %xmm4, %xmm4
    vpextrb    $8, %xmm0, %edx
    vpinsrb    $4, %edx, %xmm4, %xmm4
    vpextrb    $10, %xmm0, %edx
    vpinsrb    $5, %edx, %xmm4, %xmm4
    vpinsrb    $5, %eax, %xmm5, %xmm5
    vpextrb    $14, %xmm0, %ebx
    vpextrb    $12, %xmm0, %eax
    vpextrb    $0, %xmm2, %esi
    vpextrb    $14, %xmm1, %edx
    vpextrb    $0, %xmm3, %edi
    vpextrb    $4, %xmm3, %r15d
    vpinsrb    $6, %ecx, %xmm5, %xmm0
    vpextrb    $14, %xmm2, %ecx
    vpinsrb    $7, %edx, %xmm0, %xmm0
    vpextrb    $12, %xmm2, %edx
    vpinsrb    $8, %esi, %xmm0, %xmm1
    vpextrb    $10, %xmm2, %esi
    vpinsrb    $6, %eax, %xmm4, %xmm0
    vpextrb    $8, %xmm2, %eax
    vpinsrb    $7, %ebx, %xmm0, %xmm0
    vpextrb    $6, %xmm2, %ebx
    vpinsrb    $8, %edi, %xmm0, %xmm0
    vpextrb    $2, %xmm3, %edi
    vpinsrb    $9, %edi, %xmm0, %xmm0
    vpextrb    $2, %xmm2, %edi
    vpinsrb    $9, %edi, %xmm1, %xmm1
    vpextrb    $4, %xmm2, %edi
    vpinsrb    $10, %edi, %xmm1, %xmm1
    vpinsrb    $11, %ebx, %xmm1, %xmm1
    vpinsrb    $12, %eax, %xmm1, %xmm1
    vpinsrb    $13, %esi, %xmm1, %xmm1
    vpinsrb    $14, %edx, %xmm1, %xmm1
    vpinsrb    $15, %ecx, %xmm1, %xmm1
    vpinsrb    $10, %r15d, %xmm0, %xmm0
    vpinsrb    $11, %r14d, %xmm0, %xmm0
    vpinsrb    $12, %r11d, %xmm0, %xmm0
    vpinsrb    $13, %r10d, %xmm0, %xmm0
    vpinsrb    $14, %r9d, %xmm0, %xmm0
    vpinsrb    $15, %r8d, %xmm0, %xmm0
    vinsertf128    $1, %xmm1, %ymm0, %ymm0
    popq    %rbx
    popq    %r14
    popq    %r15
    popq    %rbp
    ret

-- 
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.



More information about the llvm-bugs mailing list