[llvm] 047f832 - [X86] ctpop-mask.ll - add 32-bit with SSE2 test coverage
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 04:24:36 PST 2024
Author: Simon Pilgrim
Date: 2024-02-09T12:24:09Z
New Revision: 047f8321f14a53caad7b564f7f654a470fdca8a9
URL: https://github.com/llvm/llvm-project/commit/047f8321f14a53caad7b564f7f654a470fdca8a9
DIFF: https://github.com/llvm/llvm-project/commit/047f8321f14a53caad7b564f7f654a470fdca8a9.diff
LOG: [X86] ctpop-mask.ll - add 32-bit with SSE2 test coverage
32-bit targets will try to use SSE2 <2 x i64> CTPOP expansion for i64 CTPOP
Added:
Modified:
llvm/test/CodeGen/X86/ctpop-mask.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/ctpop-mask.ll b/llvm/test/CodeGen/X86/ctpop-mask.ll
index e0a96a9f98879..6d4fa4a4cdfdf 100644
--- a/llvm/test/CodeGen/X86/ctpop-mask.ll
+++ b/llvm/test/CodeGen/X86/ctpop-mask.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s -check-prefixes=X86-POPCOUNT
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s -check-prefixes=X64-POPCOUNT
-; RUN: llc < %s -mtriple=i686-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT
+; RUN: llc < %s -mtriple=i686-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT,X86-NO-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 -mattr=-popcnt | FileCheck %s -check-prefixes=X86-NO-POPCOUNT,X86-SSE2
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=-popcnt | FileCheck %s -check-prefixes=X64-NO-POPCOUNT
declare i8 @llvm.ctpop.i8(i8) nounwind readnone
@@ -28,17 +29,42 @@ define i64 @ctpop_mask2(i64 %x) nounwind readnone {
; X64-POPCOUNT-NEXT: popcntl %edi, %eax
; X64-POPCOUNT-NEXT: retq
;
-; X86-NO-POPCOUNT-LABEL: ctpop_mask2:
-; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT: andl $3, %eax
-; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
-; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
-; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
-; X86-NO-POPCOUNT-NEXT: retl
+; X86-NO-SSE2-LABEL: ctpop_mask2:
+; X86-NO-SSE2: # %bb.0:
+; X86-NO-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-SSE2-NEXT: andl $3, %eax
+; X86-NO-SSE2-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-SSE2-NEXT: shrl $3, %eax
+; X86-NO-SSE2-NEXT: andl $17895697, %eax # imm = 0x1111111
+; X86-NO-SSE2-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: shrl $28, %eax
+; X86-NO-SSE2-NEXT: xorl %edx, %edx
+; X86-NO-SSE2-NEXT: retl
+;
+; X86-SSE2-LABEL: ctpop_mask2:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: andl $3, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_mask2:
; X64-NO-POPCOUNT: # %bb.0:
@@ -192,17 +218,42 @@ define i64 @ctpop_mask4(i64 %x) nounwind readnone {
; X64-POPCOUNT-NEXT: popcntl %edi, %eax
; X64-POPCOUNT-NEXT: retq
;
-; X86-NO-POPCOUNT-LABEL: ctpop_mask4:
-; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT: andl $15, %eax
-; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT: andl $17895697, %eax # imm = 0x1111111
-; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
-; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
-; X86-NO-POPCOUNT-NEXT: retl
+; X86-NO-SSE2-LABEL: ctpop_mask4:
+; X86-NO-SSE2: # %bb.0:
+; X86-NO-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-SSE2-NEXT: andl $15, %eax
+; X86-NO-SSE2-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-SSE2-NEXT: shrl $3, %eax
+; X86-NO-SSE2-NEXT: andl $17895697, %eax # imm = 0x1111111
+; X86-NO-SSE2-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: shrl $28, %eax
+; X86-NO-SSE2-NEXT: xorl %edx, %edx
+; X86-NO-SSE2-NEXT: retl
+;
+; X86-SSE2-LABEL: ctpop_mask4:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: andl $15, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_mask4:
; X64-NO-POPCOUNT: # %bb.0:
@@ -274,17 +325,42 @@ define i64 @ctpop_mask5(i64 %x) nounwind readnone {
; X64-POPCOUNT-NEXT: popcntl %edi, %eax
; X64-POPCOUNT-NEXT: retq
;
-; X86-NO-POPCOUNT-LABEL: ctpop_mask5:
-; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT: andl $31, %eax
-; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
-; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
-; X86-NO-POPCOUNT-NEXT: retl
+; X86-NO-SSE2-LABEL: ctpop_mask5:
+; X86-NO-SSE2: # %bb.0:
+; X86-NO-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-SSE2-NEXT: andl $31, %eax
+; X86-NO-SSE2-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-SSE2-NEXT: shrl $3, %eax
+; X86-NO-SSE2-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: shrl $28, %eax
+; X86-NO-SSE2-NEXT: xorl %edx, %edx
+; X86-NO-SSE2-NEXT: retl
+;
+; X86-SSE2-LABEL: ctpop_mask5:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: andl $31, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_mask5:
; X64-NO-POPCOUNT: # %bb.0:
@@ -395,18 +471,43 @@ define i64 @ctpop_shifted_mask6(i64 %x) nounwind readnone {
; X64-POPCOUNT-NEXT: popcntl %edi, %eax
; X64-POPCOUNT-NEXT: retq
;
-; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
-; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT: shrl $9, %eax
-; X86-NO-POPCOUNT-NEXT: andl $51, %eax
-; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
-; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
-; X86-NO-POPCOUNT-NEXT: retl
+; X86-NO-SSE2-LABEL: ctpop_shifted_mask6:
+; X86-NO-SSE2: # %bb.0:
+; X86-NO-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NO-SSE2-NEXT: shrl $9, %eax
+; X86-NO-SSE2-NEXT: andl $51, %eax
+; X86-NO-SSE2-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-SSE2-NEXT: shrl $3, %eax
+; X86-NO-SSE2-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: shrl $28, %eax
+; X86-NO-SSE2-NEXT: xorl %edx, %edx
+; X86-NO-SSE2-NEXT: retl
+;
+; X86-SSE2-LABEL: ctpop_shifted_mask6:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl $26112, %eax # imm = 0x6600
+; X86-SSE2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask6:
; X64-NO-POPCOUNT: # %bb.0:
@@ -559,16 +660,41 @@ define i64 @ctpop_shifted_mask8(i64 %x) nounwind readnone {
; X64-POPCOUNT-NEXT: popcntl %edi, %eax
; X64-POPCOUNT-NEXT: retq
;
-; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
-; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NO-POPCOUNT-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
-; X86-NO-POPCOUNT-NEXT: shrl $3, %eax
-; X86-NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
-; X86-NO-POPCOUNT-NEXT: shrl $28, %eax
-; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
-; X86-NO-POPCOUNT-NEXT: retl
+; X86-NO-SSE2-LABEL: ctpop_shifted_mask8:
+; X86-NO-SSE2: # %bb.0:
+; X86-NO-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NO-SSE2-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NO-SSE2-NEXT: shrl $3, %eax
+; X86-NO-SSE2-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NO-SSE2-NEXT: shrl $28, %eax
+; X86-NO-SSE2-NEXT: xorl %edx, %edx
+; X86-NO-SSE2-NEXT: retl
+;
+; X86-SSE2-LABEL: ctpop_shifted_mask8:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: shll $8, %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask8:
; X64-NO-POPCOUNT: # %bb.0:
@@ -657,27 +783,53 @@ define i64 @ctpop_shifted_mask16(i64 %x) nounwind readnone {
; X64-POPCOUNT-NEXT: popcntq %rax, %rax
; X64-POPCOUNT-NEXT: retq
;
-; X86-NO-POPCOUNT-LABEL: ctpop_shifted_mask16:
-; X86-NO-POPCOUNT: # %bb.0:
-; X86-NO-POPCOUNT-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NO-POPCOUNT-NEXT: movl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: andl $524280, %eax # imm = 0x7FFF8
-; X86-NO-POPCOUNT-NEXT: shrl %ecx
-; X86-NO-POPCOUNT-NEXT: andl $87380, %ecx # imm = 0x15554
-; X86-NO-POPCOUNT-NEXT: subl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $858993456, %ecx # imm = 0x33333330
-; X86-NO-POPCOUNT-NEXT: shrl $2, %eax
-; X86-NO-POPCOUNT-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X86-NO-POPCOUNT-NEXT: addl %ecx, %eax
-; X86-NO-POPCOUNT-NEXT: movl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: shrl $4, %ecx
-; X86-NO-POPCOUNT-NEXT: addl %eax, %ecx
-; X86-NO-POPCOUNT-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NO-POPCOUNT-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X86-NO-POPCOUNT-NEXT: shrl $24, %eax
-; X86-NO-POPCOUNT-NEXT: xorl %edx, %edx
-; X86-NO-POPCOUNT-NEXT: retl
+; X86-NO-SSE2-LABEL: ctpop_shifted_mask16:
+; X86-NO-SSE2: # %bb.0:
+; X86-NO-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NO-SSE2-NEXT: movl %ecx, %eax
+; X86-NO-SSE2-NEXT: andl $524280, %eax # imm = 0x7FFF8
+; X86-NO-SSE2-NEXT: shrl %ecx
+; X86-NO-SSE2-NEXT: andl $87380, %ecx # imm = 0x15554
+; X86-NO-SSE2-NEXT: subl %ecx, %eax
+; X86-NO-SSE2-NEXT: movl %eax, %ecx
+; X86-NO-SSE2-NEXT: andl $858993456, %ecx # imm = 0x33333330
+; X86-NO-SSE2-NEXT: shrl $2, %eax
+; X86-NO-SSE2-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NO-SSE2-NEXT: addl %ecx, %eax
+; X86-NO-SSE2-NEXT: movl %eax, %ecx
+; X86-NO-SSE2-NEXT: shrl $4, %ecx
+; X86-NO-SSE2-NEXT: addl %eax, %ecx
+; X86-NO-SSE2-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NO-SSE2-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
+; X86-NO-SSE2-NEXT: shrl $24, %eax
+; X86-NO-SSE2-NEXT: xorl %edx, %edx
+; X86-NO-SSE2-NEXT: retl
+;
+; X86-SSE2-LABEL: ctpop_shifted_mask16:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl $524280, %eax # imm = 0x7FFF8
+; X86-SSE2-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movd %eax, %xmm0
+; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
; X64-NO-POPCOUNT-LABEL: ctpop_shifted_mask16:
; X64-NO-POPCOUNT: # %bb.0:
More information about the llvm-commits
mailing list