[llvm] b8c9b06 - [X86] LowerCTPOP - add i3 and i4 LUT 'shift+mask' expansions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 21 05:55:46 PST 2024


Author: Simon Pilgrim
Date: 2024-02-21T13:53:47Z
New Revision: b8c9b0613465b2770d2ae7f61364ddce6bba4511

URL: https://github.com/llvm/llvm-project/commit/b8c9b0613465b2770d2ae7f61364ddce6bba4511
DIFF: https://github.com/llvm/llvm-project/commit/b8c9b0613465b2770d2ae7f61364ddce6bba4511.diff

LOG: [X86] LowerCTPOP - add i3 and i4 LUT 'shift+mask' expansions

Use the 3 or 4 active bits as a shift amount into a i32/i64 constant representing the number of set bits.

In future, it might be worthwhile to move this into a generic location in case other targets want to make use of them.

Another expansion pulled from #79823

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/ctpop-mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 834b470a4a867c..a86f13135173b0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31065,6 +31065,38 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
       return DAG.getZExtOrTrunc(Op, DL, VT);
     }
 
+    // i3 CTPOP - perform LUT into i32 integer.
+    if (ShiftedActiveBits <= 3) {
+      if (ActiveBits > 3)
+        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                         DAG.getShiftAmountConstant(TZ, VT, DL));
+      Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
+      Op = DAG.getNode(ISD::SHL, DL, MVT::i32, Op,
+                       DAG.getShiftAmountConstant(1, VT, DL));
+      Op = DAG.getNode(ISD::SRL, DL, MVT::i32,
+                       DAG.getConstant(0b1110100110010100U, DL, MVT::i32), Op);
+      Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
+                       DAG.getConstant(0x3, DL, MVT::i32));
+      return DAG.getZExtOrTrunc(Op, DL, VT);
+    }
+
+    // i4 CTPOP - perform LUT into i64 integer.
+    if (ShiftedActiveBits <= 4 &&
+        DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64)) {
+      SDValue LUT = DAG.getConstant(0x4332322132212110ULL, DL, MVT::i64);
+      if (ActiveBits > 4)
+        Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+                         DAG.getShiftAmountConstant(TZ, VT, DL));
+      Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
+      Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
+                       DAG.getConstant(4, DL, MVT::i32));
+      Op = DAG.getNode(ISD::SRL, DL, MVT::i64, LUT,
+                       DAG.getShiftAmountOperand(MVT::i64, Op));
+      Op = DAG.getNode(ISD::AND, DL, MVT::i64, Op,
+                       DAG.getConstant(0x7, DL, MVT::i64));
+      return DAG.getZExtOrTrunc(Op, DL, VT);
+    }
+
     // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
     if (ShiftedActiveBits <= 8) {
       SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);

diff  --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll
index 4b03563fd9924a..a43dba94d30c78 100644
--- a/llvm/test/CodeGen/X86/ctpop-mask.ll
+++ b/llvm/test/CodeGen/X86/ctpop-mask.ll
@@ -106,23 +106,24 @@ define i32 @ctpop_mask3(i32 %x) nounwind readnone {
 ;
 ; X86-NO-POPCOUNT-LABEL: ctpop_mask3:
 ; X86-NO-POPCOUNT:       # %bb.0:
-; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT:    andl $5, %eax
-; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-POPCOUNT-NEXT:    andl $5, %ecx
+; X86-NO-POPCOUNT-NEXT:    addl %ecx, %ecx
+; X86-NO-POPCOUNT-NEXT:    movl $59796, %eax # imm = 0xE994
+; X86-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NO-POPCOUNT-NEXT:    shrl %cl, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $3, %eax
 ; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_mask3:
 ; X64-NO-POPCOUNT:       # %bb.0:
+; X64-NO-POPCOUNT-NEXT:    # kill: def $edi killed $edi def $rdi
 ; X64-NO-POPCOUNT-NEXT:    andl $5, %edi
-; X64-NO-POPCOUNT-NEXT:    imull $134480385, %edi, %eax # imm = 0x8040201
-; X64-NO-POPCOUNT-NEXT:    shrl $3, %eax
-; X64-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X64-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X64-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X64-NO-POPCOUNT-NEXT:    leal (%rdi,%rdi), %ecx
+; X64-NO-POPCOUNT-NEXT:    movl $59796, %eax # imm = 0xE994
+; X64-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-POPCOUNT-NEXT:    shrl %cl, %eax
+; X64-NO-POPCOUNT-NEXT:    andl $3, %eax
 ; X64-NO-POPCOUNT-NEXT:    retq
   %mask = and i32 %x, 5 ; 0b101
   %count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -147,24 +148,23 @@ define i16 @ctpop_shifted_mask3(i16 %x) nounwind readnone {
 ;
 ; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
 ; X86-NO-POPCOUNT:       # %bb.0:
-; X86-NO-POPCOUNT-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT:    andl $14, %eax
-; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NO-POPCOUNT-NEXT:    andl $14, %ecx
+; X86-NO-POPCOUNT-NEXT:    movl $59796, %eax # imm = 0xE994
+; X86-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X86-NO-POPCOUNT-NEXT:    shrl %cl, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $3, %eax
 ; X86-NO-POPCOUNT-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask3:
 ; X64-NO-POPCOUNT:       # %bb.0:
-; X64-NO-POPCOUNT-NEXT:    andl $14, %edi
-; X64-NO-POPCOUNT-NEXT:    imull $134480385, %edi, %eax # imm = 0x8040201
-; X64-NO-POPCOUNT-NEXT:    shrl $3, %eax
-; X64-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X64-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X64-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X64-NO-POPCOUNT-NEXT:    movl %edi, %ecx
+; X64-NO-POPCOUNT-NEXT:    andl $14, %ecx
+; X64-NO-POPCOUNT-NEXT:    movl $59796, %eax # imm = 0xE994
+; X64-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-POPCOUNT-NEXT:    shrl %cl, %eax
+; X64-NO-POPCOUNT-NEXT:    andl $3, %eax
 ; X64-NO-POPCOUNT-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NO-POPCOUNT-NEXT:    retq
   %mask = and i16 %x, 14 ; 7 << 1
@@ -202,11 +202,11 @@ define i64 @ctpop_mask4(i64 %x) nounwind readnone {
 ; X64-NO-POPCOUNT-LABEL: ctpop_mask4:
 ; X64-NO-POPCOUNT:       # %bb.0:
 ; X64-NO-POPCOUNT-NEXT:    andl $15, %edi
-; X64-NO-POPCOUNT-NEXT:    imull $134480385, %edi, %eax # imm = 0x8040201
-; X64-NO-POPCOUNT-NEXT:    shrl $3, %eax
-; X64-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X64-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X64-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X64-NO-POPCOUNT-NEXT:    leal (,%rdi,4), %ecx
+; X64-NO-POPCOUNT-NEXT:    movabsq $4841987667533046032, %rax # imm = 0x4332322132212110
+; X64-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-POPCOUNT-NEXT:    shrq %cl, %rax
+; X64-NO-POPCOUNT-NEXT:    andl $7, %eax
 ; X64-NO-POPCOUNT-NEXT:    retq
   %mask = and i64 %x, 15
   %count = tail call i64 @llvm.ctpop.i64(i64 %mask)
@@ -241,13 +241,14 @@ define i32 @ctpop_shifted_mask4(i32 %x) nounwind readnone {
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
 ; X64-NO-POPCOUNT:       # %bb.0:
-; X64-NO-POPCOUNT-NEXT:    shrl $9, %edi
-; X64-NO-POPCOUNT-NEXT:    andl $15, %edi
-; X64-NO-POPCOUNT-NEXT:    imull $134480385, %edi, %eax # imm = 0x8040201
-; X64-NO-POPCOUNT-NEXT:    shrl $3, %eax
-; X64-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X64-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X64-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X64-NO-POPCOUNT-NEXT:    movl %edi, %ecx
+; X64-NO-POPCOUNT-NEXT:    shrl $7, %ecx
+; X64-NO-POPCOUNT-NEXT:    andl $60, %ecx
+; X64-NO-POPCOUNT-NEXT:    movabsq $4841987667533046032, %rax # imm = 0x4332322132212110
+; X64-NO-POPCOUNT-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NO-POPCOUNT-NEXT:    shrq %cl, %rax
+; X64-NO-POPCOUNT-NEXT:    andl $7, %eax
+; X64-NO-POPCOUNT-NEXT:    # kill: def $eax killed $eax killed $rax
 ; X64-NO-POPCOUNT-NEXT:    retq
   %mask = and i32 %x, 7680 ; 15 << 9
   %count = tail call i32 @llvm.ctpop.i32(i32 %mask)


        


More information about the llvm-commits mailing list