[LLVMbugs] [Bug 6455] New: Clear most significant 32 bits in the elements <2 x i64> efficiently
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Mon Mar 1 12:56:02 PST 2010
http://llvm.org/bugs/show_bug.cgi?id=6455
Summary: Clear most significant 32 bits in the elements <2 x
i64> efficiently
Product: libraries
Version: 2.6
Platform: PC
OS/Version: Linux
Status: NEW
Severity: enhancement
Priority: P5
Component: Backend: X86
AssignedTo: unassignedbugs at nondot.org
ReportedBy: llvm at henning-thielemann.de
CC: llvmbugs at cs.uiuc.edu
I wanted to write a multiplication of two <2 x i 64> vectors that could be
implemented by pmuludq.
(See http://llvm.org/bugs/show_bug.cgi?id=6399)
I cleared the upper 32 bits of the vector elements of the factors before doing
the multiplication,
in order to show LLVM that pmuludq can be used instead of a full <2 x i 64>
multiplication.
(I was not sure whether LLVM understands this hint.)
However I found that clearing the upper 32 bits is sometimes compiled to quite
inefficient code depending on how I write it.
The first two of the following implementations lead to inefficient code,
and might be replaced automatically by the last one.
define <2 x i64> @_clearupper2xi64a(<2 x i64>) {
%x0 = extractelement <2 x i64> %0, i32 0
%x1 = extractelement <2 x i64> %0, i32 1
%trunc0 = trunc i64 %x0 to i32
%trunc1 = trunc i64 %x1 to i32
%ext0 = zext i32 %trunc0 to i64
%ext1 = zext i32 %trunc1 to i64
%v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
%v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1
ret <2 x i64> %v1
}
define <2 x i64> @_clearupper2xi64b(<2 x i64>) {
%x32 = bitcast <2 x i64> %0 to <4 x i32>
%r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1
%r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3
%r = bitcast <4 x i32> %r1 to <2 x i64>
ret <2 x i64> %r
}
define <2 x i64> @_clearupper2xi64c(<2 x i64>) {
%r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0
ret <2 x i64> %r
}
They are compiled to:
.align 16
.globl _clearupper2xi64a
.type _clearupper2xi64a, at function
_clearupper2xi64a: #
@_clearupper2xi64a
.Leh_func_begin7:
.LBB7_0:
pushl %ebp
.Llabel14:
movl %esp, %ebp
.Llabel15:
andl $-16, %esp
xorps %xmm1, %xmm1
movss %xmm0, %xmm1
movhlps %xmm0, %xmm0
movd %xmm0, %eax
movd %eax, %xmm2
movaps %xmm1, %xmm0
movlhps %xmm2, %xmm0
movl %ebp, %esp
popl %ebp
ret
.size _clearupper2xi64a, .-_clearupper2xi64a
.Leh_func_end7:
.align 16
.globl _clearupper2xi64b
.type _clearupper2xi64b, at function
_clearupper2xi64b: #
@_clearupper2xi64b
.Leh_func_begin8:
.LBB8_0:
pushl %ebp
.Llabel16:
movl %esp, %ebp
.Llabel17:
andl $-16, %esp
movaps %xmm0, %xmm1
pslldq $8, %xmm1
shufps $226, %xmm0, %xmm1
xorl %eax, %eax
movd %eax, %xmm0
movaps %xmm1, %xmm2
movss %xmm0, %xmm2
movaps %xmm1, %xmm0
shufps $36, %xmm2, %xmm0
movl %ebp, %esp
popl %ebp
ret
.size _clearupper2xi64b, .-_clearupper2xi64b
.Leh_func_end8:
.section .rodata.cst16,"aM", at progbits,16
.align 16
.LCPI9_0: # constant <4 x
i32>
.long 4294967295 # 0xFFFFFFFF
.zero 4
.long 4294967295 # 0xFFFFFFFF
.zero 4
.text
.align 16
.globl _clearupper2xi64c
.type _clearupper2xi64c, at function
_clearupper2xi64c: #
@_clearupper2xi64c
.Leh_func_begin9:
.LBB9_0:
pushl %ebp
.Llabel18:
movl %esp, %ebp
.Llabel19:
andl $-16, %esp
pand .LCPI9_0, %xmm0
movl %ebp, %esp
popl %ebp
ret
.size _clearupper2xi64c, .-_clearupper2xi64c
.Leh_func_end9:
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list