[LLVMbugs] [Bug 9623] New: x86: inefficient code generated for i8 vector types
bugzilla-daemon at llvm.org
bugzilla-daemon at llvm.org
Mon Apr 4 05:22:40 PDT 2011
http://llvm.org/bugs/show_bug.cgi?id=9623
Summary: x86: inefficient code generated for i8 vector types
Product: new-bugs
Version: trunk
Platform: PC
OS/Version: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
AssignedTo: unassignedbugs at nondot.org
ReportedBy: matt at pharr.org
CC: llvmbugs at cs.uiuc.edu
Given this input:
define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y, <4 x i8> %__mask) nounwind
readnone alwaysinline {
entry:
%binop = mul <4 x i8> %x, %y
%binop6 = add <4 x i8> %binop, %x
ret <4 x i8> %binop6
}
The following quite lengthy code is generated by llc. It would be nice to get
the appropriate MMX instructions instead. (This is probably not a high
priority fix in the grand scheme of things, though.)
_foo: ## @foo
## BB#0: ## %entry
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
movdqa %xmm0, %xmm2
pextrb $1, %xmm2, %eax
pextrb $1, %xmm1, %ecx
mulb %cl
pextrb $0, %xmm2, %ecx
pextrb $0, %xmm1, %edx
movzbl %al, %esi
movb %cl, %al
mulb %dl
movzbl %al, %eax
movd %eax, %xmm0
pextrb $2, %xmm2, %eax
pextrb $2, %xmm1, %ecx
pinsrb $1, %esi, %xmm0
mulb %cl
movb %al, %cl
pextrb $3, %xmm2, %eax
pextrb $3, %xmm1, %edx
mulb %dl
movb %al, %dl
movzbl %cl, %ecx
pextrb $4, %xmm2, %eax
pextrb $4, %xmm1, %esi
pinsrb $2, %ecx, %xmm0
mulb %sil
movzbl %dl, %ecx
pextrb $11, %xmm2, %edx
pextrb $12, %xmm2, %esi
pextrb $13, %xmm2, %edi
pextrb $14, %xmm2, %r8d
movl %r8d, -4(%rsp) ## 4-byte Spill
pextrb $5, %xmm1, %r9d
pextrb $5, %xmm2, %r10d
pextrb $8, %xmm1, %r11d
pinsrb $3, %ecx, %xmm0
movzbl %al, %ecx
pextrb $15, %xmm2, %ebx
pextrb $8, %xmm2, %r14d
pextrb $12, %xmm1, %r15d
movb %r10b, %al
pextrb $13, %xmm1, %r10d
pinsrb $4, %ecx, %xmm0
pextrb $14, %xmm1, %ecx
pextrb $15, %xmm1, %r12d
mulb %r9b
movb %al, %r9b
pextrb $11, %xmm1, %r13d
pextrb $10, %xmm2, %ebp
movb %r14b, %al
mulb %r11b
movb %al, %r11b
pextrb $9, %xmm2, %eax
pextrb $9, %xmm1, %r14d
mulb %r14b
movb %al, %r14b
pextrb $10, %xmm1, %r8d
movb %bpl, %al
mulb %r8b
movb %al, %r8b
movb %dl, %al
mulb %r13b
movb %al, %dl
movb %sil, %al
mulb %r15b
movb %al, %sil
movb %dil, %al
mulb %r10b
movb %al, %dil
movl -4(%rsp), %eax ## 4-byte Reload
mulb %cl
movb %al, %cl
movb %bl, %al
mulb %r12b
movb %al, %r10b
movzbl %r9b, %r9d
pextrb $7, %xmm2, %eax
pextrb $7, %xmm1, %ebx
mulb %bl
pinsrb $5, %r9d, %xmm0
movzbl %r10b, %r9d
movzbl %cl, %ecx
movzbl %dil, %edi
movzbl %sil, %esi
movzbl %dl, %edx
movzbl %r8b, %r8d
movzbl %r14b, %r10d
movzbl %r11b, %r11d
movzbl %al, %ebx
pextrb $6, %xmm2, %eax
pextrb $6, %xmm1, %r14d
mulb %r14b
movzbl %al, %eax
pinsrb $6, %eax, %xmm0
pinsrb $7, %ebx, %xmm0
pinsrb $8, %r11d, %xmm0
pinsrb $9, %r10d, %xmm0
pinsrb $10, %r8d, %xmm0
pinsrb $11, %edx, %xmm0
pinsrb $12, %esi, %xmm0
pinsrb $13, %edi, %xmm0
pinsrb $14, %ecx, %xmm0
pinsrb $15, %r9d, %xmm0
paddb %xmm2, %xmm0
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
ret
If I explicitly extract the values from the vector, do the math, and repack,
like this:
define <4 x i8> @bar(<4 x i8> %x, <4 x i8> %y, <4 x i8> %__mask) nounwind
readnone alwaysinline {
entry:
%x0 = extractelement <4 x i8> %x, i32 0
%x1 = extractelement <4 x i8> %x, i32 1
%x2 = extractelement <4 x i8> %x, i32 2
%x3 = extractelement <4 x i8> %x, i32 3
%y0 = extractelement <4 x i8> %y, i32 0
%y1 = extractelement <4 x i8> %y, i32 1
%y2 = extractelement <4 x i8> %y, i32 2
%y3 = extractelement <4 x i8> %y, i32 3
%m0 = mul i8 %x0, %y0
%m1 = mul i8 %x1, %y1
%m2 = mul i8 %x2, %y2
%m3 = mul i8 %x3, %y3
%a0 = add i8 %m0, %x0
%a1 = add i8 %m1, %x1
%a2 = add i8 %m2, %x2
%a3 = add i8 %m3, %x3
%r0 = insertelement <4 x i8> undef, i8 %a0, i32 0
%r1 = insertelement <4 x i8> %r0, i8 %a1, i32 1
%r2 = insertelement <4 x i8> %r1, i8 %a2, i32 2
%r3 = insertelement <4 x i8> %r2, i8 %a3, i32 3
ret <4 x i8> %r3
}
The code is better:
_bar: ## @bar
## BB#0: ## %entry
pextrb $2, %xmm0, %ecx
pextrb $2, %xmm1, %edx
movb %cl, %al
mulb %dl
movb %al, %dl
addb %cl, %dl
pextrb $0, %xmm0, %ecx
pextrb $0, %xmm1, %esi
movb %cl, %al
mulb %sil
pextrb $3, %xmm0, %esi
movb %al, %dil
addb %cl, %dil
movzbl %dl, %ecx
pextrb $3, %xmm1, %edx
movb %sil, %al
mulb %dl
addb %sil, %al
movzbl %al, %edx
shll $8, %edx
pextrb $1, %xmm0, %esi
orl %ecx, %edx
movzbl %dil, %ecx
pextrb $1, %xmm1, %edi
movb %sil, %al
mulb %dil
addb %sil, %al
movzbl %al, %eax
shll $8, %eax
orl %ecx, %eax
pinsrw $0, %eax, %xmm0
pinsrw $1, %edx, %xmm0
ret
--
Configure bugmail: http://llvm.org/bugs/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
You are on the CC list for the bug.
More information about the llvm-bugs
mailing list