[LLVMbugs] [Bug 2109] New: Missed optimization for extract/ insertelements equivalent to movhps

Wed Feb 27 19:52:03 PST 2008

http://llvm.org/bugs/show_bug.cgi?id=2109

           Summary: Missed optimization for extract/insertelements
                    equivalent to movhps
           Product: new-bugs
           Version: unspecified
          Platform: PC
        OS/Version: Linux
            Status: NEW
          Severity: normal
          Priority: P2
         Component: new bugs
        AssignedTo: unassignedbugs at nondot.org
        ReportedBy: sharparrow1 at yahoo.com
                CC: llvmbugs at cs.uiuc.edu


Take the following bitcode

define <4 x float> @_mm_loadh_pi22(<4 x float> %__A, <2 x i32>* %__P) nounwind 
{
entry:
        load <2 x i32>* %__P
        bitcast <2 x i32> %0 to <2 x float>
        extractelement <2 x float> %1, i32 0
        extractelement <2 x float> %1, i32 1
        insertelement <4 x float> %__A, float %2, i32 2
        insertelement <4 x float> %4, float %3, i32 3
        ret <4 x float> %5
}

This currently codegens to the following:
_mm_loadh_pi22:
        subl    $8, %esp
        movl    12(%esp), %eax
        movl    4(%eax), %ecx
        movl    %ecx, 4(%esp)
        movl    (%eax), %eax
        movl    %eax, (%esp)
        movss   (%esp), %xmm1
        movaps  %xmm0, %xmm2
        shufps  $3, %xmm1, %xmm2
        shufps  $36, %xmm2, %xmm0
        movss   4(%esp), %xmm1
        movaps  %xmm0, %xmm2
        shufps  $2, %xmm1, %xmm2
        shufps  $132, %xmm2, %xmm0
        addl    $8, %esp
        ret

Unless I've made a mistake, this should codegen to the following:
_mm_loadh_pi22:
        movl    4(%esp), %eax
        movhps  (%eax), %xmm0
        ret

Note that LLVM does know how to generate movhps... it does in fact generate it
for the following, which is functionally equivalent:
define <4 x float> @_mm_loadh_pi22(<4 x float> %__A, <1 x i64>* %__P) nounwind 
{
entry:
        %tmp4 = bitcast <1 x i64>* %__P to double*
        %tmp5 = load double* %tmp4
        %tmp6 = insertelement <2 x double> undef, double %tmp5, i32 0
        %tmp8 = bitcast <2 x double> %tmp6 to <4 x float>
        %tmp9 = shufflevector <4 x float> %__A, <4 x float> %tmp8, <4 x i32> <
i32 0, i32 1, i32 4, i32 5 >
        ret <4 x float> %tmp9
}


-- 
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.