[llvm] 3a75807 - [X86] Allow i8 CTPOP expansion to work with a 'shifted' active bits value of 8 bits or less
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 10:13:54 PST 2024
Author: Simon Pilgrim
Date: 2024-02-02T18:03:20Z
New Revision: 3a758076f54d521d20e32856a62c7d24803ee9e8
URL: https://github.com/llvm/llvm-project/commit/3a758076f54d521d20e32856a62c7d24803ee9e8
DIFF: https://github.com/llvm/llvm-project/commit/3a758076f54d521d20e32856a62c7d24803ee9e8.diff
LOG: [X86] Allow i8 CTPOP expansion to work with a 'shifted' active bits value of 8 bits or less
Shift down the value so the active bits are at the lsb
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/ctpop-mask.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b6468a9b18e76..35613e665266a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31037,12 +31037,22 @@ static SDValue LowerCTPOP(SDValue N, const X86Subtarget &Subtarget,
SDLoc DL(N);
if (VT.isScalarInteger()) {
+ // Compute the lower/upper bounds of the active bits of the value,
+ // allowing us to shift the active bits down if necessary to fit into the
+ // special cases below.
KnownBits Known = DAG.computeKnownBits(Op);
- unsigned ActiveBits = Known.countMaxActiveBits();
+ unsigned LZ = Known.countMinLeadingZeros();
+ unsigned TZ = Known.countMinTrailingZeros();
+ assert((LZ + TZ) < Known.getBitWidth() && "Illegal shifted mask");
+ unsigned ActiveBits = Known.getBitWidth() - LZ;
+ unsigned ShiftedActiveBits = Known.getBitWidth() - (LZ + TZ);
// i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
- if (ActiveBits <= 8) {
+ if (ShiftedActiveBits <= 8) {
SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);
+ if (ActiveBits > 8)
+ Op = DAG.getNode(ISD::SRL, DL, VT, Op,
+ DAG.getShiftAmountConstant(TZ, VT, DL));
Op = DAG.getZExtOrTrunc(Op, DL, MVT::i32);
Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
DAG.getConstant(0x08040201U, DL, MVT::i32));
diff --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll
index 1af964dd11c66..abbcf22f77e43 100644
--- a/llvm/test/CodeGen/X86/ctpop-mask.ll
+++ b/llvm/test/CodeGen/X86/ctpop-mask.ll
@@ -70,43 +70,25 @@ define i32 @ctpop_shifted_mask2(i32 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask2:
; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: andl $1572864, %eax # imm = 0x180000
-; X86-NO-POPCOUNT-NEXT: shrl %ecx
-; X86-NO-POPCOUNT-NEXT: andl $262144, %ecx # imm = 0x40000
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858783744, %ecx # imm = 0x33300000
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858980352, %eax # imm = 0x33330000
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT: shrl $19, %eax
+; X86-NO-POPCOUNT-NEXT: andl $3, %eax
+; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
+; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask2:
; X64-NO-POPCOUNT: # %bb.0:
-; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: andl $1572864, %eax # imm = 0x180000
-; X64-NO-POPCOUNT-NEXT: shrl %edi
-; X64-NO-POPCOUNT-NEXT: andl $262144, %edi # imm = 0x40000
-; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $858783744, %ecx # imm = 0x33300000
-; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X64-NO-POPCOUNT-NEXT: andl $858980352, %eax # imm = 0x33330000
-; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X64-NO-POPCOUNT-NEXT: shrl $19, %edi
+; X64-NO-POPCOUNT-NEXT: andl $3, %edi
+; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
+; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i32 %x, 1572864 ; 3 << 19
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -252,43 +234,25 @@ define i32 @ctpop_shifted_mask4(i32 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: andl $7680, %eax # imm = 0x1E00
-; X86-NO-POPCOUNT-NEXT: shrl %ecx
-; X86-NO-POPCOUNT-NEXT: andl $1280, %ecx # imm = 0x500
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
+; X86-NO-POPCOUNT-NEXT: andl $15, %eax
+; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
+; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask4:
; X64-NO-POPCOUNT: # %bb.0:
-; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: andl $7680, %eax # imm = 0x1E00
-; X64-NO-POPCOUNT-NEXT: shrl %edi
-; X64-NO-POPCOUNT-NEXT: andl $1280, %edi # imm = 0x500
-; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
-; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X64-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
+; X64-NO-POPCOUNT-NEXT: andl $15, %edi
+; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X64-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
+; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i32 %x, 7680 ; 15 << 9
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -352,43 +316,25 @@ define i32 @ctpop_shifted_mask5(i32 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask5:
; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: andl $11776, %eax # imm = 0x2E00
-; X86-NO-POPCOUNT-NEXT: shrl %ecx
-; X86-NO-POPCOUNT-NEXT: andl $5376, %ecx # imm = 0x1500
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
+; X86-NO-POPCOUNT-NEXT: andl $23, %eax
+; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask5:
; X64-NO-POPCOUNT: # %bb.0:
-; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: andl $11776, %eax # imm = 0x2E00
-; X64-NO-POPCOUNT-NEXT: shrl %edi
-; X64-NO-POPCOUNT-NEXT: andl $5376, %edi # imm = 0x1500
-; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
-; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X64-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
+; X64-NO-POPCOUNT-NEXT: andl $23, %edi
+; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i32 %x, 11776 ; 23 << 9
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -451,47 +397,26 @@ define i64 @ctpop_shifted_mask6(i64 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: andl $26112, %eax # imm = 0x6600
-; X86-NO-POPCOUNT-NEXT: shrl %ecx
-; X86-NO-POPCOUNT-NEXT: andl $4352, %ecx # imm = 0x1100
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
+; X86-NO-POPCOUNT-NEXT: andl $51, %eax
+; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
; X64-NO-POPCOUNT: # %bb.0:
-; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: andl $26112, %eax # imm = 0x6600
-; X64-NO-POPCOUNT-NEXT: shrl %edi
-; X64-NO-POPCOUNT-NEXT: andl $4352, %edi # imm = 0x1100
-; X64-NO-POPCOUNT-NEXT: subq %rdi, %rax
-; X64-NO-POPCOUNT-NEXT: movabsq $3689348814741910272, %rcx # imm = 0x3333333333333300
-; X64-NO-POPCOUNT-NEXT: movq %rax, %rdx
-; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
-; X64-NO-POPCOUNT-NEXT: shrq $2, %rax
-; X64-NO-POPCOUNT-NEXT: andq %rcx, %rax
-; X64-NO-POPCOUNT-NEXT: addq %rdx, %rax
-; X64-NO-POPCOUNT-NEXT: movq %rax, %rcx
-; X64-NO-POPCOUNT-NEXT: shrq $4, %rcx
-; X64-NO-POPCOUNT-NEXT: addq %rax, %rcx
-; X64-NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
-; X64-NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; X64-NO-POPCOUNT-NEXT: imulq %rdx, %rax
-; X64-NO-POPCOUNT-NEXT: shrq $56, %rax
+; X64-NO-POPCOUNT-NEXT: shrl $9, %edi
+; X64-NO-POPCOUNT-NEXT: andl $51, %edi
+; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i64 %x, 26112 ; 51 << 9
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)
@@ -557,43 +482,25 @@ define i32 @ctpop_shift_mask7(i32 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shift_mask7:
; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: andl $1040384, %eax # imm = 0xFE000
-; X86-NO-POPCOUNT-NEXT: shrl %ecx
-; X86-NO-POPCOUNT-NEXT: andl $348160, %ecx # imm = 0x55000
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858992640, %ecx # imm = 0x33333000
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858992640, %eax # imm = 0x33333000
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT: shrl $13, %eax
+; X86-NO-POPCOUNT-NEXT: andl $127, %eax
+; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shift_mask7:
; X64-NO-POPCOUNT: # %bb.0:
-; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: andl $1040384, %eax # imm = 0xFE000
-; X64-NO-POPCOUNT-NEXT: shrl %edi
-; X64-NO-POPCOUNT-NEXT: andl $348160, %edi # imm = 0x55000
-; X64-NO-POPCOUNT-NEXT: subl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $858992640, %ecx # imm = 0x33333000
-; X64-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X64-NO-POPCOUNT-NEXT: andl $858992640, %eax # imm = 0x33333000
-; X64-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X64-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X64-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X64-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X64-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X64-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X64-NO-POPCOUNT-NEXT: shrl $13, %edi
+; X64-NO-POPCOUNT-NEXT: andl $127, %edi
+; X64-NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i32 %x, 1040384 ; 127 << 13
%count = tail call i32 @llvm.ctpop.i32(i32 %mask)
@@ -654,47 +561,24 @@ define i64 @ctpop_shifted_mask8(i64 %x) nounwind readnone {
;
; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: shll $8, %eax
-; X86-NO-POPCOUNT-NEXT: shll $7, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $21760, %ecx # imm = 0x5500
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %ecx # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858993408, %eax # imm = 0x33333300
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
+; X86-NO-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
; X86-NO-POPCOUNT-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
; X64-NO-POPCOUNT: # %bb.0:
-; X64-NO-POPCOUNT-NEXT: movl %edi, %eax
-; X64-NO-POPCOUNT-NEXT: andl $65280, %eax # imm = 0xFF00
-; X64-NO-POPCOUNT-NEXT: shrl %edi
-; X64-NO-POPCOUNT-NEXT: andl $21760, %edi # imm = 0x5500
-; X64-NO-POPCOUNT-NEXT: subq %rdi, %rax
-; X64-NO-POPCOUNT-NEXT: movabsq $3689348814741910272, %rcx # imm = 0x3333333333333300
-; X64-NO-POPCOUNT-NEXT: movq %rax, %rdx
-; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
-; X64-NO-POPCOUNT-NEXT: shrq $2, %rax
-; X64-NO-POPCOUNT-NEXT: andq %rcx, %rax
-; X64-NO-POPCOUNT-NEXT: addq %rdx, %rax
-; X64-NO-POPCOUNT-NEXT: movq %rax, %rcx
-; X64-NO-POPCOUNT-NEXT: shrq $4, %rcx
-; X64-NO-POPCOUNT-NEXT: addq %rax, %rcx
-; X64-NO-POPCOUNT-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NO-POPCOUNT-NEXT: andq %rcx, %rdx
-; X64-NO-POPCOUNT-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; X64-NO-POPCOUNT-NEXT: imulq %rdx, %rax
-; X64-NO-POPCOUNT-NEXT: shrq $56, %rax
+; X64-NO-POPCOUNT-NEXT: movq %rdi, %rax
+; X64-NO-POPCOUNT-NEXT: movzbl %ah, %eax
+; X64-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NO-POPCOUNT-NEXT: shrl $3, %eax
+; X64-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NO-POPCOUNT-NEXT: shrl $28, %eax
; X64-NO-POPCOUNT-NEXT: retq
%mask = and i64 %x, 65280 ; 255 << 8
%count = tail call i64 @llvm.ctpop.i64(i64 %mask)
More information about the llvm-commits
mailing list