[llvm] 9ba2656 - [X86] ReplaceNodeResults - shrink i64 CTPOP to (shifted) CTPOP i32 if 32 or less active bits to avoid SSE2 codegen

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 9 04:24:37 PST 2024


Author: Simon Pilgrim
Date: 2024-02-09T12:24:09Z
New Revision: 9ba265636f3310b5b5b39767715e1843a06ea603

URL: https://github.com/llvm/llvm-project/commit/9ba265636f3310b5b5b39767715e1843a06ea603
DIFF: https://github.com/llvm/llvm-project/commit/9ba265636f3310b5b5b39767715e1843a06ea603.diff

LOG: [X86] ReplaceNodeResults - shrink i64 CTPOP to (shifted) CTPOP i32 if 32 or less active bits to avoid SSE2 codegen

32-bit targets perform i64 CTPOP as a v2i64 CTPOP - if we can perform this as a i32 CTPOP by shifting the source bits, then do so to avoid the gpr<->xmm

This also triggers on non-SSE2 capable targets, as can be seen with the minor codegen diffs in ctpop_shifted_mask16

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/ctpop-mask.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0c2d5f85bf26f3..18f9871b2bd0c3 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32109,6 +32109,20 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     return;
   case ISD::CTPOP: {
     assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+    // If we have at most 32 active bits, then perform as i32 CTPOP.
+    // TODO: Perform this in generic legalizer?
+    KnownBits Known = DAG.computeKnownBits(N->getOperand(0));
+    unsigned LZ = Known.countMinLeadingZeros();
+    unsigned TZ = Known.countMinTrailingZeros();
+    if ((LZ + TZ) >= 32) {
+      SDValue Op = DAG.getNode(ISD::SRL, dl, MVT::i64, N->getOperand(0),
+                               DAG.getShiftAmountConstant(TZ, MVT::i64, dl));
+      Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Op);
+      Op = DAG.getNode(ISD::CTPOP, dl, MVT::i32, Op);
+      Op = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Op);
+      Results.push_back(Op);
+      return;
+    }
     // Use a v2i64 if possible.
     bool NoImplicitFloatOps =
         DAG.getMachineFunction().getFunction().hasFnAttribute(

diff  --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll
index 6d4fa4a4cdfdf4..97c634a2a133d7 100644
--- a/llvm/test/CodeGen/X86/ctpop-mask.ll
+++ b/llvm/test/CodeGen/X86/ctpop-mask.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s -check-prefixes=X86-POPCOUNT
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s -check-prefixes=X64-POPCOUNT
-; RUN: llc < %s -mtriple=i686-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT,X86-NO-SSE2
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT,X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=X64-NO-POPCOUNT
 
 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
@@ -29,42 +29,17 @@ define i64 @ctpop_mask2(i64 %x) nounwind readnone {
 ; X64-POPCOUNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCOUNT-NEXT:    retq
 ;
-; X86-NO-SSE2-LABEL: ctpop_mask2:
-; X86-NO-SSE2:       # %bb.0:
-; X86-NO-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NO-SSE2-NEXT:    andl $3, %eax
-; X86-NO-SSE2-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-SSE2-NEXT:    shrl $3, %eax
-; X86-NO-SSE2-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X86-NO-SSE2-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    shrl $28, %eax
-; X86-NO-SSE2-NEXT:    xorl %edx, %edx
-; X86-NO-SSE2-NEXT:    retl
-;
-; X86-SSE2-LABEL: ctpop_mask2:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    andl $3, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    pand %xmm1, %xmm2
-; X86-SSE2-NEXT:    psrlw $2, %xmm0
-; X86-SSE2-NEXT:    pand %xmm1, %xmm0
-; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $4, %xmm1
-; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
-; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    xorl %edx, %edx
-; X86-SSE2-NEXT:    retl
+; X86-NO-POPCOUNT-LABEL: ctpop_mask2:
+; X86-NO-POPCOUNT:       # %bb.0:
+; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT:    andl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
+; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    xorl %edx, %edx
+; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_mask2:
 ; X64-NO-POPCOUNT:       # %bb.0:
@@ -218,42 +193,17 @@ define i64 @ctpop_mask4(i64 %x) nounwind readnone {
 ; X64-POPCOUNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCOUNT-NEXT:    retq
 ;
-; X86-NO-SSE2-LABEL: ctpop_mask4:
-; X86-NO-SSE2:       # %bb.0:
-; X86-NO-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NO-SSE2-NEXT:    andl $15, %eax
-; X86-NO-SSE2-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-SSE2-NEXT:    shrl $3, %eax
-; X86-NO-SSE2-NEXT:    andl $17895697, %eax # imm = 0x1111111
-; X86-NO-SSE2-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    shrl $28, %eax
-; X86-NO-SSE2-NEXT:    xorl %edx, %edx
-; X86-NO-SSE2-NEXT:    retl
-;
-; X86-SSE2-LABEL: ctpop_mask4:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    andl $15, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    pand %xmm1, %xmm2
-; X86-SSE2-NEXT:    psrlw $2, %xmm0
-; X86-SSE2-NEXT:    pand %xmm1, %xmm0
-; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $4, %xmm1
-; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
-; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    xorl %edx, %edx
-; X86-SSE2-NEXT:    retl
+; X86-NO-POPCOUNT-LABEL: ctpop_mask4:
+; X86-NO-POPCOUNT:       # %bb.0:
+; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT:    andl $15, %eax
+; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $17895697, %eax # imm = 0x1111111
+; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    xorl %edx, %edx
+; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_mask4:
 ; X64-NO-POPCOUNT:       # %bb.0:
@@ -325,42 +275,17 @@ define i64 @ctpop_mask5(i64 %x) nounwind readnone {
 ; X64-POPCOUNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCOUNT-NEXT:    retq
 ;
-; X86-NO-SSE2-LABEL: ctpop_mask5:
-; X86-NO-SSE2:       # %bb.0:
-; X86-NO-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NO-SSE2-NEXT:    andl $31, %eax
-; X86-NO-SSE2-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-SSE2-NEXT:    shrl $3, %eax
-; X86-NO-SSE2-NEXT:    andl $286331153, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    shrl $28, %eax
-; X86-NO-SSE2-NEXT:    xorl %edx, %edx
-; X86-NO-SSE2-NEXT:    retl
-;
-; X86-SSE2-LABEL: ctpop_mask5:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    andl $31, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    pand %xmm1, %xmm2
-; X86-SSE2-NEXT:    psrlw $2, %xmm0
-; X86-SSE2-NEXT:    pand %xmm1, %xmm0
-; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $4, %xmm1
-; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
-; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    xorl %edx, %edx
-; X86-SSE2-NEXT:    retl
+; X86-NO-POPCOUNT-LABEL: ctpop_mask5:
+; X86-NO-POPCOUNT:       # %bb.0:
+; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT:    andl $31, %eax
+; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    xorl %edx, %edx
+; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_mask5:
 ; X64-NO-POPCOUNT:       # %bb.0:
@@ -471,43 +396,18 @@ define i64 @ctpop_shifted_mask6(i64 %x) nounwind readnone {
 ; X64-POPCOUNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCOUNT-NEXT:    retq
 ;
-; X86-NO-SSE2-LABEL: ctpop_shifted_mask6:
-; X86-NO-SSE2:       # %bb.0:
-; X86-NO-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NO-SSE2-NEXT:    shrl $9, %eax
-; X86-NO-SSE2-NEXT:    andl $51, %eax
-; X86-NO-SSE2-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-SSE2-NEXT:    shrl $3, %eax
-; X86-NO-SSE2-NEXT:    andl $286331153, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    shrl $28, %eax
-; X86-NO-SSE2-NEXT:    xorl %edx, %edx
-; X86-NO-SSE2-NEXT:    retl
-;
-; X86-SSE2-LABEL: ctpop_shifted_mask6:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movl $26112, %eax # imm = 0x6600
-; X86-SSE2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    pand %xmm1, %xmm2
-; X86-SSE2-NEXT:    psrlw $2, %xmm0
-; X86-SSE2-NEXT:    pand %xmm1, %xmm0
-; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $4, %xmm1
-; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
-; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    xorl %edx, %edx
-; X86-SSE2-NEXT:    retl
+; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
+; X86-NO-POPCOUNT:       # %bb.0:
+; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT:    shrl $9, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $51, %eax
+; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    xorl %edx, %edx
+; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
 ; X64-NO-POPCOUNT:       # %bb.0:
@@ -660,41 +560,16 @@ define i64 @ctpop_shifted_mask8(i64 %x) nounwind readnone {
 ; X64-POPCOUNT-NEXT:    popcntl %edi, %eax
 ; X64-POPCOUNT-NEXT:    retq
 ;
-; X86-NO-SSE2-LABEL: ctpop_shifted_mask8:
-; X86-NO-SSE2:       # %bb.0:
-; X86-NO-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NO-SSE2-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-SSE2-NEXT:    shrl $3, %eax
-; X86-NO-SSE2-NEXT:    andl $286331153, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-SSE2-NEXT:    shrl $28, %eax
-; X86-NO-SSE2-NEXT:    xorl %edx, %edx
-; X86-NO-SSE2-NEXT:    retl
-;
-; X86-SSE2-LABEL: ctpop_shifted_mask8:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    shll $8, %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    pand %xmm1, %xmm2
-; X86-SSE2-NEXT:    psrlw $2, %xmm0
-; X86-SSE2-NEXT:    pand %xmm1, %xmm0
-; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $4, %xmm1
-; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
-; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    xorl %edx, %edx
-; X86-SSE2-NEXT:    retl
+; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
+; X86-NO-POPCOUNT:       # %bb.0:
+; X86-NO-POPCOUNT-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT:    imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT:    shrl $28, %eax
+; X86-NO-POPCOUNT-NEXT:    xorl %edx, %edx
+; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
 ; X64-NO-POPCOUNT:       # %bb.0:
@@ -783,53 +658,28 @@ define i64 @ctpop_shifted_mask16(i64 %x) nounwind readnone {
 ; X64-POPCOUNT-NEXT:    popcntq %rax, %rax
 ; X64-POPCOUNT-NEXT:    retq
 ;
-; X86-NO-SSE2-LABEL: ctpop_shifted_mask16:
-; X86-NO-SSE2:       # %bb.0:
-; X86-NO-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-SSE2-NEXT:    movl %ecx, %eax
-; X86-NO-SSE2-NEXT:    andl $524280, %eax # imm = 0x7FFF8
-; X86-NO-SSE2-NEXT:    shrl %ecx
-; X86-NO-SSE2-NEXT:    andl $87380, %ecx # imm = 0x15554
-; X86-NO-SSE2-NEXT:    subl %ecx, %eax
-; X86-NO-SSE2-NEXT:    movl %eax, %ecx
-; X86-NO-SSE2-NEXT:    andl $858993456, %ecx # imm = 0x33333330
-; X86-NO-SSE2-NEXT:    shrl $2, %eax
-; X86-NO-SSE2-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NO-SSE2-NEXT:    addl %ecx, %eax
-; X86-NO-SSE2-NEXT:    movl %eax, %ecx
-; X86-NO-SSE2-NEXT:    shrl $4, %ecx
-; X86-NO-SSE2-NEXT:    addl %eax, %ecx
-; X86-NO-SSE2-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-SSE2-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-SSE2-NEXT:    shrl $24, %eax
-; X86-NO-SSE2-NEXT:    xorl %edx, %edx
-; X86-NO-SSE2-NEXT:    retl
-;
-; X86-SSE2-LABEL: ctpop_shifted_mask16:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movl $524280, %eax # imm = 0x7FFF8
-; X86-SSE2-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    movd %eax, %xmm0
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    psubb %xmm1, %xmm0
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    pand %xmm1, %xmm2
-; X86-SSE2-NEXT:    psrlw $2, %xmm0
-; X86-SSE2-NEXT:    pand %xmm1, %xmm0
-; X86-SSE2-NEXT:    paddb %xmm2, %xmm0
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
-; X86-SSE2-NEXT:    psrlw $4, %xmm1
-; X86-SSE2-NEXT:    paddb %xmm0, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm0, %xmm0
-; X86-SSE2-NEXT:    psadbw %xmm1, %xmm0
-; X86-SSE2-NEXT:    movd %xmm0, %eax
-; X86-SSE2-NEXT:    xorl %edx, %edx
-; X86-SSE2-NEXT:    retl
+; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask16:
+; X86-NO-POPCOUNT:       # %bb.0:
+; X86-NO-POPCOUNT-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-POPCOUNT-NEXT:    movl %ecx, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $524280, %eax # imm = 0x7FFF8
+; X86-NO-POPCOUNT-NEXT:    shrl $4, %ecx
+; X86-NO-POPCOUNT-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X86-NO-POPCOUNT-NEXT:    shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT:    subl %ecx, %eax
+; X86-NO-POPCOUNT-NEXT:    movl %eax, %ecx
+; X86-NO-POPCOUNT-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NO-POPCOUNT-NEXT:    shrl $2, %eax
+; X86-NO-POPCOUNT-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-NO-POPCOUNT-NEXT:    addl %ecx, %eax
+; X86-NO-POPCOUNT-NEXT:    movl %eax, %ecx
+; X86-NO-POPCOUNT-NEXT:    shrl $4, %ecx
+; X86-NO-POPCOUNT-NEXT:    addl %eax, %ecx
+; X86-NO-POPCOUNT-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NO-POPCOUNT-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
+; X86-NO-POPCOUNT-NEXT:    shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT:    xorl %edx, %edx
+; X86-NO-POPCOUNT-NEXT:    retl
 ;
 ; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask16:
 ; X64-NO-POPCOUNT:       # %bb.0:


        


More information about the llvm-commits mailing list