[LLVMbugs] [Bug 6455] New: Clear most significant 32 bits in the elements <2 x i64> efficiently

Mon Mar 1 12:56:02 PST 2010

http://llvm.org/bugs/show_bug.cgi?id=6455

           Summary: Clear most significant 32 bits in the elements <2 x
                    i64> efficiently
           Product: libraries
           Version: 2.6
          Platform: PC
        OS/Version: Linux
            Status: NEW
          Severity: enhancement
          Priority: P5
         Component: Backend: X86
        AssignedTo: unassignedbugs at nondot.org
        ReportedBy: llvm at henning-thielemann.de
                CC: llvmbugs at cs.uiuc.edu

I wanted to write a multiplication of two <2 x i 64> vectors that could be
implemented by pmuludq.
(See http://llvm.org/bugs/show_bug.cgi?id=6399)
I cleared the upper 32 bits of the vector elements of the factors before doing
the multiplication,
in order to show LLVM that pmuludq can be used instead of a full <2 x i 64>
multiplication.
(I was not sure whether LLVM understands this hint.)
However I found that clearing the upper 32 bits is sometimes compiled to quite
inefficient code depending on how I write it.

The first two of the following implementations lead to inefficient code,
and might be replaced automatically by the last one.

define <2 x i64> @_clearupper2xi64a(<2 x i64>) {
  %x0 = extractelement <2 x i64> %0, i32 0
  %x1 = extractelement <2 x i64> %0, i32 1
  %trunc0 = trunc i64 %x0 to i32
  %trunc1 = trunc i64 %x1 to i32
  %ext0 = zext i32 %trunc0 to i64
  %ext1 = zext i32 %trunc1 to i64
  %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
  %v1 = insertelement <2 x i64> %v0,   i64 %ext1, i32 1
  ret <2 x i64> %v1
}

define <2 x i64> @_clearupper2xi64b(<2 x i64>) {
  %x32 = bitcast <2 x i64> %0 to <4 x i32>
  %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
  %r1 = insertelement <4 x i32> %r0,  i32 zeroinitializer, i32 3
  %r = bitcast <4 x i32> %r1 to <2 x i64>
  ret <2 x i64> %r
}

define <2 x i64> @_clearupper2xi64c(<2 x i64>) {
  %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
  ret <2 x i64> %r
}

They are compiled to:
    .align    16
    .globl    _clearupper2xi64a
    .type    _clearupper2xi64a, at function
_clearupper2xi64a:                                          #
@_clearupper2xi64a
.Leh_func_begin7:
.LBB7_0:
    pushl    %ebp
.Llabel14:
    movl    %esp, %ebp
.Llabel15:
    andl    $-16, %esp
    xorps    %xmm1, %xmm1
    movss    %xmm0, %xmm1
    movhlps    %xmm0, %xmm0
    movd    %xmm0, %eax
    movd    %eax, %xmm2
    movaps    %xmm1, %xmm0
    movlhps    %xmm2, %xmm0
    movl    %ebp, %esp
    popl    %ebp
    ret
    .size    _clearupper2xi64a, .-_clearupper2xi64a
.Leh_func_end7:

    .align    16
    .globl    _clearupper2xi64b
    .type    _clearupper2xi64b, at function
_clearupper2xi64b:                                          #
@_clearupper2xi64b
.Leh_func_begin8:
.LBB8_0:
    pushl    %ebp
.Llabel16:
    movl    %esp, %ebp
.Llabel17:
    andl    $-16, %esp
    movaps    %xmm0, %xmm1
    pslldq    $8, %xmm1
    shufps    $226, %xmm0, %xmm1
    xorl    %eax, %eax
    movd    %eax, %xmm0
    movaps    %xmm1, %xmm2
    movss    %xmm0, %xmm2
    movaps    %xmm1, %xmm0
    shufps    $36, %xmm2, %xmm0
    movl    %ebp, %esp
    popl    %ebp
    ret
    .size    _clearupper2xi64b, .-_clearupper2xi64b
.Leh_func_end8:

    .section    .rodata.cst16,"aM", at progbits,16
    .align    16
.LCPI9_0:                                                   # constant <4 x
i32>
    .long    4294967295                                  # 0xFFFFFFFF
    .zero    4
    .long    4294967295                                  # 0xFFFFFFFF
    .zero    4
    .text
    .align    16
    .globl    _clearupper2xi64c
    .type    _clearupper2xi64c, at function
_clearupper2xi64c:                                          #
@_clearupper2xi64c
.Leh_func_begin9:
.LBB9_0:
    pushl    %ebp
.Llabel18:
    movl    %esp, %ebp
.Llabel19:
    andl    $-16, %esp
    pand    .LCPI9_0, %xmm0
    movl    %ebp, %esp
    popl    %ebp
    ret
    .size    _clearupper2xi64c, .-_clearupper2xi64c
.Leh_func_end9:

-- 
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.