[llvm] [DAG] Add generic i8 CTPOP lowering using i32 MUL (PR #79989)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 30 03:52:34 PST 2024
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/79989
Fixes #79823
>From bb8ba816b4a1bf0ae485deb9ff1111a5d47674f8 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 30 Jan 2024 11:50:44 +0000
Subject: [PATCH] [DAG] Add generic i8 CTPOP lowering using i32 MUL
Fixes #79823
---
.../CodeGen/SelectionDAG/TargetLowering.cpp | 15 +++++
llvm/test/CodeGen/X86/ctpop-combine.ll | 21 +++----
llvm/test/CodeGen/X86/popcnt.ll | 58 +++++++------------
3 files changed, 43 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b8ed02e268b1..2eb68485c777 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8639,6 +8639,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
return SDValue();
+ if (VT == MVT::i8 && isOperationLegal(ISD::SRL, MVT::i32) &&
+ isOperationLegal(ISD::MUL, MVT::i32)) {
+ SDValue Mask11 = DAG.getConstant(0x11111111U, dl, MVT::i32);
+ Op = DAG.getZExtOrTrunc(Op, dl, MVT::i32);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op,
+ DAG.getConstant(0x08040201U, dl, MVT::i32));
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(3, MVT::i32, dl));
+ Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::MUL, dl, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::SRL, dl, MVT::i32, Op,
+ DAG.getShiftAmountConstant(28, MVT::i32, dl));
+ return DAG.getZExtOrTrunc(Op, dl, MVT::i8);
+ }
+
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
SDValue Mask55 =
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index fba44218e057..73152e9f909c 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -88,20 +88,13 @@ define i8 @test4(i8 %x) nounwind readnone {
;
; NO-POPCOUNT-LABEL: test4:
; NO-POPCOUNT: # %bb.0:
-; NO-POPCOUNT-NEXT: movl %edi, %ecx
-; NO-POPCOUNT-NEXT: andb $127, %cl
-; NO-POPCOUNT-NEXT: shrb %dil
-; NO-POPCOUNT-NEXT: andb $21, %dil
-; NO-POPCOUNT-NEXT: subb %dil, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: andb $51, %al
-; NO-POPCOUNT-NEXT: shrb $2, %cl
-; NO-POPCOUNT-NEXT: andb $51, %cl
-; NO-POPCOUNT-NEXT: addb %al, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: shrb $4, %al
-; NO-POPCOUNT-NEXT: addb %cl, %al
-; NO-POPCOUNT-NEXT: andb $15, %al
+; NO-POPCOUNT-NEXT: andl $127, %edi
+; NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; NO-POPCOUNT-NEXT: shrl $3, %eax
+; NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: shrl $28, %eax
+; NO-POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
; NO-POPCOUNT-NEXT: retq
%x2 = and i8 %x, 127
%count = tail call i8 @llvm.ctpop.i8(i8 %x2)
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index a9d77fd2c0a6..c8d060dfee18 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -10,37 +10,24 @@
define i8 @cnt8(i8 %x) nounwind readnone {
; X86-LABEL: cnt8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb %al
-; X86-NEXT: andb $85, %al
-; X86-NEXT: subb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andb $51, %al
-; X86-NEXT: shrb $2, %cl
-; X86-NEXT: andb $51, %cl
-; X86-NEXT: addb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb $4, %al
-; X86-NEXT: addb %cl, %al
-; X86-NEXT: andb $15, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NEXT: shrl $28, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: cnt8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrb %al
-; X64-NEXT: andb $85, %al
-; X64-NEXT: subb %al, %dil
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andb $51, %cl
-; X64-NEXT: shrb $2, %dil
-; X64-NEXT: andb $51, %dil
-; X64-NEXT: addb %dil, %cl
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: shrb $4, %al
-; X64-NEXT: addb %cl, %al
-; X64-NEXT: andb $15, %al
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NEXT: shrl $3, %eax
+; X64-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NEXT: shrl $28, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X86-POPCNT-LABEL: cnt8:
@@ -59,16 +46,13 @@ define i8 @cnt8(i8 %x) nounwind readnone {
;
; X64-NDD-LABEL: cnt8:
; X64-NDD: # %bb.0:
-; X64-NDD-NEXT: shrb %dil, %al
-; X64-NDD-NEXT: andb $85, %al
-; X64-NDD-NEXT: subb %al, %dil, %al
-; X64-NDD-NEXT: andb $51, %al, %cl
-; X64-NDD-NEXT: shrb $2, %al
-; X64-NDD-NEXT: andb $51, %al
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: shrb $4, %al, %cl
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: andb $15, %al
+; X64-NDD-NEXT: movzbl %dil, %eax
+; X64-NDD-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NDD-NEXT: shrl $3, %eax
+; X64-NDD-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NDD-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NDD-NEXT: shrl $28, %eax
+; X64-NDD-NEXT: # kill: def $al killed $al killed $eax
; X64-NDD-NEXT: retq
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
ret i8 %cnt
More information about the llvm-commits
mailing list