[llvm-bugs] [Bug 43688] New: optimize ctpop codegen by width and narrow ctpop intrinsic in IR
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed Oct 16 06:25:59 PDT 2019
https://bugs.llvm.org/show_bug.cgi?id=43688
Bug ID: 43688
Summary: optimize ctpop codegen by width and narrow ctpop
intrinsic in IR
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: Scalar Optimizations
Assignee: unassignedbugs at nondot.org
Reporter: spatel+llvm at rotateright.com
CC: llvm-bugs at lists.llvm.org
Spinning this off from bug 43656:
define i32 @zpop(i8 %x) {
%z = zext i8 %x to i32
%pop = tail call i32 @llvm.ctpop.i32(i32 %z)
ret i32 %pop
}
define i32 @popz(i8 %x) {
%pop = tail call i8 @llvm.ctpop.i8(i8 %x)
%z = zext i8 %pop to i32
ret i32 %z
}
declare i8 @llvm.ctpop.i8(i8)
declare i32 @llvm.ctpop.i32(i32)
--------------------------------------------------------------------------
These are equivalent, so we should try to canonicalize them in IR. The narrow
call is likely better for vectorization and would line up with our transforms
of most math/logic ops.
But we don't have DAGCombiner and/or legalization to ensure that the narrow
call is optimized in codegen.
For example on base x86-64:
_zpop: ## @zpop
movzbl %dil, %eax
movl %eax, %ecx
shrl %ecx
andl $-43, %ecx
subl %ecx, %eax
movl %eax, %ecx
andl $858993459, %ecx ## imm = 0x33333333
shrl $2, %eax
andl $858993459, %eax ## imm = 0x33333333
addl %ecx, %eax
movl %eax, %ecx
shrl $4, %ecx
addl %eax, %ecx
andl $252645135, %ecx ## imm = 0xF0F0F0F
imull $16843009, %ecx, %eax ## imm = 0x1010101
shrl $24, %eax
retq
_popz: ## @popz
movl %edi, %eax
shrb %al
andb $85, %al
subb %al, %dil
movl %edi, %eax
andb $51, %al
shrb $2, %dil
andb $51, %dil
addb %al, %dil
movl %edi, %eax
shrb $4, %al
addb %dil, %al
andb $15, %al
movzbl %al, %eax
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20191016/6403a60b/attachment.html>
More information about the llvm-bugs
mailing list