[LLVMbugs] [Bug 2584] New: Suboptimal codegen for <2 x i32> extractelement
bugzilla-daemon at cs.uiuc.edu
bugzilla-daemon at cs.uiuc.edu
Tue Jul 22 15:19:24 PDT 2008
http://llvm.org/bugs/show_bug.cgi?id=2584
Summary: Suboptimal codegen for <2 x i32> extractelement
Product: new-bugs
Version: unspecified
Platform: PC
OS/Version: All
Status: NEW
Severity: enhancement
Priority: P2
Component: new bugs
AssignedTo: unassignedbugs at nondot.org
ReportedBy: nicolas at capens.net
CC: llvmbugs at cs.uiuc.edu
The following LLVM IR extracts both the lower and upper dword from a 64-bit
register (MMX):
external constant <2 x i32> ; <<2 x i32>*>:0 [#uses=1]
external constant i32 ; <i32*>:1 [#uses=1]
external constant i32 ; <i32*>:2 [#uses=1]
define internal void @""() {
load <2 x i32>* @0, align 8 ; <<2 x i32>>:1 [#uses=2]
extractelement <2 x i32> %1, i32 0 ; <i32>:2 [#uses=1]
store i32 %2, i32* @1, align 4
extractelement <2 x i32> %1, i32 1 ; <i32>:3 [#uses=1]
store i32 %3, i32* @2, align 4
ret void
}
It generates the following code:
sub esp,14h
movq mm0,mmword ptr ds:[47E8700h]
movq mmword ptr [esp+8],mm0
mov eax,dword ptr [esp+8]
mov dword ptr ds:[47E86FCh],eax
movq mmword ptr [esp],mm0
mov eax,dword ptr [esp+4]
mov dword ptr ds:[47E86F8h],eax
add esp,14h
ret
This could be more optimal, like this:
movq mm0,mmword ptr ds:[47E8700h]
movd eax,mm0
mov dword ptr ds:[47E86FCh],eax
punpckhdq mm0,mm0
movd eax,mm0
mov dword ptr ds:[47E86F8h],eax
ret
That's significantly faster thanks to keeping things in registers (and the
first extract really became just a single movd).
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list