[llvm] e0c06e3 - [X86] Regenerate popcnt tests. NFCI.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 27 10:01:44 PDT 2020
Author: Simon Pilgrim
Date: 2020-10-27T16:45:46Z
New Revision: e0c06e310c23ebf2bc20f7f4324aec2ec3d34861
URL: https://github.com/llvm/llvm-project/commit/e0c06e310c23ebf2bc20f7f4324aec2ec3d34861
DIFF: https://github.com/llvm/llvm-project/commit/e0c06e310c23ebf2bc20f7f4324aec2ec3d34861.diff
LOG: [X86] Regenerate popcnt tests. NFCI.
Merge prefixes where possible, use 'X86' instead of 'X32' (which we try to only use for gnux32 triple tests).
Added:
Modified:
llvm/test/CodeGen/X86/popcnt.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index cc6f3153d2ca..3fe9871bae65 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -1,29 +1,29 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X32,X32-NOSSE
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X32-POPCNT
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
-; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X32,X32-SSE2
-; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X32,X32-SSSE3
+; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
+; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3
define i8 @cnt8(i8 %x) nounwind readnone {
-; X32-LABEL: cnt8:
-; X32: # %bb.0:
-; X32-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrb %al
-; X32-NEXT: andb $85, %al
-; X32-NEXT: subb %al, %cl
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: andb $51, %al
-; X32-NEXT: shrb $2, %cl
-; X32-NEXT: andb $51, %cl
-; X32-NEXT: addb %al, %cl
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shrb $4, %al
-; X32-NEXT: addb %cl, %al
-; X32-NEXT: andb $15, %al
-; X32-NEXT: retl
+; X86-LABEL: cnt8:
+; X86: # %bb.0:
+; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb %al
+; X86-NEXT: andb $85, %al
+; X86-NEXT: subb %al, %cl
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andb $51, %al
+; X86-NEXT: shrb $2, %cl
+; X86-NEXT: andb $51, %cl
+; X86-NEXT: addb %al, %cl
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shrb $4, %al
+; X86-NEXT: addb %cl, %al
+; X86-NEXT: andb $15, %al
+; X86-NEXT: retl
;
; X64-LABEL: cnt8:
; X64: # %bb.0:
@@ -44,12 +44,12 @@ define i8 @cnt8(i8 %x) nounwind readnone {
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt8:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: popcntl %eax, %eax
-; X32-POPCNT-NEXT: # kill: def $al killed $al killed $eax
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt8:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: popcntl %eax, %eax
+; X86-POPCNT-NEXT: # kill: def $al killed $al killed $eax
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt8:
; X64-POPCNT: # %bb.0:
@@ -62,28 +62,28 @@ define i8 @cnt8(i8 %x) nounwind readnone {
}
define i16 @cnt16(i16 %x) nounwind readnone {
-; X32-LABEL: cnt16:
-; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl %ecx
-; X32-NEXT: andl $21845, %ecx # imm = 0x5555
-; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $13107, %ecx # imm = 0x3333
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: andl $13107, %eax # imm = 0x3333
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $4, %ecx
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: andl $3855, %ecx # imm = 0xF0F
-; X32-NEXT: movl %ecx, %eax
-; X32-NEXT: shll $8, %eax
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movzbl %ah, %eax
-; X32-NEXT: # kill: def $ax killed $ax killed $eax
-; X32-NEXT: retl
+; X86-LABEL: cnt16:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl %ecx
+; X86-NEXT: andl $21845, %ecx # imm = 0x5555
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andl $13107, %ecx # imm = 0x3333
+; X86-NEXT: shrl $2, %eax
+; X86-NEXT: andl $13107, %eax # imm = 0x3333
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl $4, %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: andl $3855, %ecx # imm = 0xF0F
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: shll $8, %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movzbl %ah, %eax
+; X86-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-NEXT: retl
;
; X64-LABEL: cnt16:
; X64: # %bb.0:
@@ -107,10 +107,10 @@ define i16 @cnt16(i16 %x) nounwind readnone {
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt16:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt16:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntw {{[0-9]+}}(%esp), %ax
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt16:
; X64-POPCNT: # %bb.0:
@@ -121,25 +121,25 @@ define i16 @cnt16(i16 %x) nounwind readnone {
}
define i32 @cnt32(i32 %x) nounwind readnone {
-; X32-LABEL: cnt32:
-; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl %ecx
-; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $4, %ecx
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X32-NEXT: shrl $24, %eax
-; X32-NEXT: retl
+; X86-LABEL: cnt32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl %ecx
+; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT: shrl $2, %eax
+; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl $4, %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
+; X86-NEXT: shrl $24, %eax
+; X86-NEXT: retl
;
; X64-LABEL: cnt32:
; X64: # %bb.0:
@@ -160,10 +160,10 @@ define i32 @cnt32(i32 %x) nounwind readnone {
; X64-NEXT: shrl $24, %eax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt32:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt32:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt32:
; X64-POPCNT: # %bb.0:
@@ -174,43 +174,43 @@ define i32 @cnt32(i32 %x) nounwind readnone {
}
define i64 @cnt64(i64 %x) nounwind readnone {
-; X32-NOSSE-LABEL: cnt64:
-; X32-NOSSE: # %bb.0:
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: shrl %edx
-; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edx, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %ecx
-; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edx, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: shrl $4, %edx
-; X32-NOSSE-NEXT: addl %ecx, %edx
-; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %ecx
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: shrl %edx
-; X32-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edx, %eax
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %eax
-; X32-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edx, %eax
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: shrl $4, %edx
-; X32-NOSSE-NEXT: addl %eax, %edx
-; X32-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: addl %ecx, %eax
-; X32-NOSSE-NEXT: xorl %edx, %edx
-; X32-NOSSE-NEXT: retl
+; X86-NOSSE-LABEL: cnt64:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %ecx
+; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %ecx, %edx
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ecx
+; X86-NOSSE-NEXT: movl %eax, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edx, %eax
+; X86-NOSSE-NEXT: movl %eax, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edx, %eax
+; X86-NOSSE-NEXT: movl %eax, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: addl %ecx, %eax
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: retl
;
; X64-LABEL: cnt64:
; X64: # %bb.0:
@@ -235,146 +235,146 @@ define i64 @cnt64(i64 %x) nounwind readnone {
; X64-NEXT: shrq $56, %rax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt64:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: addl %ecx, %eax
-; X32-POPCNT-NEXT: xorl %edx, %edx
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt64:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: addl %ecx, %eax
+; X86-POPCNT-NEXT: xorl %edx, %edx
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt64:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
;
-; X32-SSE2-LABEL: cnt64:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $1, %xmm1
-; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE2-NEXT: psubb %xmm1, %xmm0
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: pand %xmm1, %xmm2
-; X32-SSE2-NEXT: psrlw $2, %xmm0
-; X32-SSE2-NEXT: pand %xmm1, %xmm0
-; X32-SSE2-NEXT: paddb %xmm2, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $4, %xmm1
-; X32-SSE2-NEXT: paddb %xmm0, %xmm1
-; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE2-NEXT: pxor %xmm0, %xmm0
-; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
-; X32-SSE2-NEXT: movd %xmm0, %eax
-; X32-SSE2-NEXT: xorl %edx, %edx
-; X32-SSE2-NEXT: retl
+; X86-SSE2-LABEL: cnt64:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
-; X32-SSSE3-LABEL: cnt64:
-; X32-SSSE3: # %bb.0:
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
-; X32-SSSE3-NEXT: psrlw $4, %xmm1
-; X32-SSSE3-NEXT: pand %xmm0, %xmm1
-; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
-; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
-; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
-; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
-; X32-SSSE3-NEXT: movd %xmm0, %eax
-; X32-SSSE3-NEXT: xorl %edx, %edx
-; X32-SSSE3-NEXT: retl
+; X86-SSSE3-LABEL: cnt64:
+; X86-SSSE3: # %bb.0:
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
+; X86-SSSE3-NEXT: psrlw $4, %xmm1
+; X86-SSSE3-NEXT: pand %xmm0, %xmm1
+; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3
+; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
+; X86-SSSE3-NEXT: pxor %xmm0, %xmm0
+; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0
+; X86-SSSE3-NEXT: movd %xmm0, %eax
+; X86-SSSE3-NEXT: xorl %edx, %edx
+; X86-SSSE3-NEXT: retl
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
}
define i128 @cnt128(i128 %x) nounwind readnone {
-; X32-NOSSE-LABEL: cnt128:
-; X32-NOSSE: # %bb.0:
-; X32-NOSSE-NEXT: pushl %ebx
-; X32-NOSSE-NEXT: pushl %edi
-; X32-NOSSE-NEXT: pushl %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NOSSE-NEXT: movl %edi, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %ebx, %edi
-; X32-NOSSE-NEXT: movl %edi, %ebx
-; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %edi
-; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %ebx, %edi
-; X32-NOSSE-NEXT: movl %edi, %ebx
-; X32-NOSSE-NEXT: shrl $4, %ebx
-; X32-NOSSE-NEXT: addl %edi, %ebx
-; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edi
-; X32-NOSSE-NEXT: movl %esi, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %ebx, %esi
-; X32-NOSSE-NEXT: movl %esi, %ebx
-; X32-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %esi
-; X32-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %ebx, %esi
-; X32-NOSSE-NEXT: movl %esi, %ebx
-; X32-NOSSE-NEXT: shrl $4, %ebx
-; X32-NOSSE-NEXT: addl %esi, %ebx
-; X32-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %esi
-; X32-NOSSE-NEXT: addl %edi, %esi
-; X32-NOSSE-NEXT: movl %edx, %edi
-; X32-NOSSE-NEXT: shrl %edi
-; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edi, %edx
-; X32-NOSSE-NEXT: movl %edx, %edi
-; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %edx
-; X32-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edi, %edx
-; X32-NOSSE-NEXT: movl %edx, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %edx, %edi
-; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: shrl %edi
-; X32-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %edi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X32-NOSSE-NEXT: shrl $2, %ecx
-; X32-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X32-NOSSE-NEXT: addl %edi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %ecx, %edi
-; X32-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %ecx
-; X32-NOSSE-NEXT: addl %edx, %ecx
-; X32-NOSSE-NEXT: addl %esi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, (%eax)
-; X32-NOSSE-NEXT: movl $0, 12(%eax)
-; X32-NOSSE-NEXT: movl $0, 8(%eax)
-; X32-NOSSE-NEXT: movl $0, 4(%eax)
-; X32-NOSSE-NEXT: popl %esi
-; X32-NOSSE-NEXT: popl %edi
-; X32-NOSSE-NEXT: popl %ebx
-; X32-NOSSE-NEXT: retl $4
+; X86-NOSSE-LABEL: cnt128:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %edi
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X86-NOSSE-NEXT: movl %edi, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %ebx, %edi
+; X86-NOSSE-NEXT: movl %edi, %ebx
+; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %edi
+; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %ebx, %edi
+; X86-NOSSE-NEXT: movl %edi, %ebx
+; X86-NOSSE-NEXT: shrl $4, %ebx
+; X86-NOSSE-NEXT: addl %edi, %ebx
+; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %ebx, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl $1431655765, %ebx # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %ebx, %esi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: andl $858993459, %ebx # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl $858993459, %esi # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %ebx, %esi
+; X86-NOSSE-NEXT: movl %esi, %ebx
+; X86-NOSSE-NEXT: shrl $4, %ebx
+; X86-NOSSE-NEXT: addl %esi, %ebx
+; X86-NOSSE-NEXT: andl $252645135, %ebx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %ebx, %esi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %esi
+; X86-NOSSE-NEXT: addl %edi, %esi
+; X86-NOSSE-NEXT: movl %edx, %edi
+; X86-NOSSE-NEXT: shrl %edi
+; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edi, %edx
+; X86-NOSSE-NEXT: movl %edx, %edi
+; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %edx
+; X86-NOSSE-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edi, %edx
+; X86-NOSSE-NEXT: movl %edx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %edx, %edi
+; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edx
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: shrl %edi
+; X86-NOSSE-NEXT: andl $1431655765, %edi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %edi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X86-NOSSE-NEXT: shrl $2, %ecx
+; X86-NOSSE-NEXT: andl $858993459, %ecx # imm = 0x33333333
+; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: imull $16843009, %edi, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ecx
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: addl %esi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: movl $0, 12(%eax)
+; X86-NOSSE-NEXT: movl $0, 8(%eax)
+; X86-NOSSE-NEXT: movl $0, 4(%eax)
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: popl %edi
+; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: retl $4
;
; X64-LABEL: cnt128:
; X64: # %bb.0:
@@ -416,23 +416,23 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt128:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: pushl %esi
-; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
-; X32-POPCNT-NEXT: addl %ecx, %edx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
-; X32-POPCNT-NEXT: addl %ecx, %esi
-; X32-POPCNT-NEXT: addl %edx, %esi
-; X32-POPCNT-NEXT: movl %esi, (%eax)
-; X32-POPCNT-NEXT: movl $0, 12(%eax)
-; X32-POPCNT-NEXT: movl $0, 8(%eax)
-; X32-POPCNT-NEXT: movl $0, 4(%eax)
-; X32-POPCNT-NEXT: popl %esi
-; X32-POPCNT-NEXT: retl $4
+; X86-POPCNT-LABEL: cnt128:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %esi
+; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: addl %ecx, %edx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: addl %ecx, %esi
+; X86-POPCNT-NEXT: addl %edx, %esi
+; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: movl $0, 12(%eax)
+; X86-POPCNT-NEXT: movl $0, 8(%eax)
+; X86-POPCNT-NEXT: movl $0, 4(%eax)
+; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128:
; X64-POPCNT: # %bb.0:
@@ -442,129 +442,129 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X64-POPCNT-NEXT: xorl %edx, %edx
; X64-POPCNT-NEXT: retq
;
-; X32-SSE2-LABEL: cnt128:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $1, %xmm1
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; X32-SSE2-NEXT: pand %xmm2, %xmm1
-; X32-SSE2-NEXT: psubb %xmm1, %xmm0
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: pand %xmm1, %xmm3
-; X32-SSE2-NEXT: psrlw $2, %xmm0
-; X32-SSE2-NEXT: pand %xmm1, %xmm0
-; X32-SSE2-NEXT: paddb %xmm3, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: psrlw $4, %xmm3
-; X32-SSE2-NEXT: paddb %xmm0, %xmm3
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSE2-NEXT: pand %xmm0, %xmm3
-; X32-SSE2-NEXT: pxor %xmm4, %xmm4
-; X32-SSE2-NEXT: psadbw %xmm4, %xmm3
-; X32-SSE2-NEXT: movd %xmm3, %ecx
-; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm5
-; X32-SSE2-NEXT: psrlw $1, %xmm5
-; X32-SSE2-NEXT: pand %xmm2, %xmm5
-; X32-SSE2-NEXT: psubb %xmm5, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
-; X32-SSE2-NEXT: pand %xmm1, %xmm2
-; X32-SSE2-NEXT: psrlw $2, %xmm3
-; X32-SSE2-NEXT: pand %xmm1, %xmm3
-; X32-SSE2-NEXT: paddb %xmm2, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm1
-; X32-SSE2-NEXT: psrlw $4, %xmm1
-; X32-SSE2-NEXT: paddb %xmm3, %xmm1
-; X32-SSE2-NEXT: pand %xmm0, %xmm1
-; X32-SSE2-NEXT: psadbw %xmm4, %xmm1
-; X32-SSE2-NEXT: movd %xmm1, %edx
-; X32-SSE2-NEXT: addl %ecx, %edx
-; X32-SSE2-NEXT: movl %edx, (%eax)
-; X32-SSE2-NEXT: movl $0, 12(%eax)
-; X32-SSE2-NEXT: movl $0, 8(%eax)
-; X32-SSE2-NEXT: movl $0, 4(%eax)
-; X32-SSE2-NEXT: retl $4
+; X86-SSE2-LABEL: cnt128:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; X86-SSE2-NEXT: pand %xmm2, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm3, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: psrlw $4, %xmm3
+; X86-SSE2-NEXT: paddb %xmm0, %xmm3
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSE2-NEXT: pand %xmm0, %xmm3
+; X86-SSE2-NEXT: pxor %xmm4, %xmm4
+; X86-SSE2-NEXT: psadbw %xmm4, %xmm3
+; X86-SSE2-NEXT: movd %xmm3, %ecx
+; X86-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
+; X86-SSE2-NEXT: psrlw $1, %xmm5
+; X86-SSE2-NEXT: pand %xmm2, %xmm5
+; X86-SSE2-NEXT: psubb %xmm5, %xmm3
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm3
+; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: paddb %xmm2, %xmm3
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm3, %xmm1
+; X86-SSE2-NEXT: pand %xmm0, %xmm1
+; X86-SSE2-NEXT: psadbw %xmm4, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %edx
+; X86-SSE2-NEXT: addl %ecx, %edx
+; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: movl $0, 12(%eax)
+; X86-SSE2-NEXT: movl $0, 8(%eax)
+; X86-SSE2-NEXT: movl $0, 4(%eax)
+; X86-SSE2-NEXT: retl $4
;
-; X32-SSSE3-LABEL: cnt128:
-; X32-SSSE3: # %bb.0:
-; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
-; X32-SSSE3-NEXT: psrlw $4, %xmm1
-; X32-SSSE3-NEXT: pand %xmm0, %xmm1
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2
-; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2
-; X32-SSSE3-NEXT: paddb %xmm4, %xmm2
-; X32-SSSE3-NEXT: pxor %xmm1, %xmm1
-; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2
-; X32-SSSE3-NEXT: movd %xmm2, %ecx
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4
-; X32-SSSE3-NEXT: pand %xmm0, %xmm4
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5
-; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5
-; X32-SSSE3-NEXT: psrlw $4, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3
-; X32-SSSE3-NEXT: paddb %xmm5, %xmm3
-; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3
-; X32-SSSE3-NEXT: movd %xmm3, %edx
-; X32-SSSE3-NEXT: addl %ecx, %edx
-; X32-SSSE3-NEXT: movl %edx, (%eax)
-; X32-SSSE3-NEXT: movl $0, 12(%eax)
-; X32-SSSE3-NEXT: movl $0, 8(%eax)
-; X32-SSSE3-NEXT: movl $0, 4(%eax)
-; X32-SSSE3-NEXT: retl $4
+; X86-SSSE3-LABEL: cnt128:
+; X86-SSSE3: # %bb.0:
+; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
+; X86-SSSE3-NEXT: psrlw $4, %xmm1
+; X86-SSSE3-NEXT: pand %xmm0, %xmm1
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm2
+; X86-SSSE3-NEXT: pshufb %xmm1, %xmm2
+; X86-SSSE3-NEXT: paddb %xmm4, %xmm2
+; X86-SSSE3-NEXT: pxor %xmm1, %xmm1
+; X86-SSSE3-NEXT: psadbw %xmm1, %xmm2
+; X86-SSSE3-NEXT: movd %xmm2, %ecx
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm2, %xmm4
+; X86-SSSE3-NEXT: pand %xmm0, %xmm4
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm5
+; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5
+; X86-SSSE3-NEXT: psrlw $4, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3
+; X86-SSSE3-NEXT: paddb %xmm5, %xmm3
+; X86-SSSE3-NEXT: psadbw %xmm1, %xmm3
+; X86-SSSE3-NEXT: movd %xmm3, %edx
+; X86-SSSE3-NEXT: addl %ecx, %edx
+; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: movl $0, 12(%eax)
+; X86-SSSE3-NEXT: movl $0, 8(%eax)
+; X86-SSSE3-NEXT: movl $0, 4(%eax)
+; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
}
define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
-; X32-LABEL: cnt64_noimplicitfloat:
-; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrl %edx
-; X32-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; X32-NEXT: subl %edx, %ecx
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X32-NEXT: shrl $2, %ecx
-; X32-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; X32-NEXT: addl %edx, %ecx
-; X32-NEXT: movl %ecx, %edx
-; X32-NEXT: shrl $4, %edx
-; X32-NEXT: addl %ecx, %edx
-; X32-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; X32-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
-; X32-NEXT: shrl $24, %ecx
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: shrl %edx
-; X32-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; X32-NEXT: subl %edx, %eax
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: andl $858993459, %edx # imm = 0x33333333
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: shrl $4, %edx
-; X32-NEXT: addl %eax, %edx
-; X32-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; X32-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
-; X32-NEXT: shrl $24, %eax
-; X32-NEXT: addl %ecx, %eax
-; X32-NEXT: xorl %edx, %edx
-; X32-NEXT: retl
+; X86-LABEL: cnt64_noimplicitfloat:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NEXT: subl %edx, %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NEXT: shrl $2, %ecx
+; X86-NEXT: andl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: shrl $4, %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NEXT: imull $16843009, %edx, %ecx # imm = 0x1010101
+; X86-NEXT: shrl $24, %ecx
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $1431655765, %edx # imm = 0x55555555
+; X86-NEXT: subl %edx, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl $858993459, %edx # imm = 0x33333333
+; X86-NEXT: shrl $2, %eax
+; X86-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: shrl $4, %edx
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NEXT: shrl $24, %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: retl
;
; X64-LABEL: cnt64_noimplicitfloat:
; X64: # %bb.0:
@@ -589,13 +589,13 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
; X64-NEXT: shrq $56, %rax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt64_noimplicitfloat:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: addl %ecx, %eax
-; X32-POPCNT-NEXT: xorl %edx, %edx
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt64_noimplicitfloat:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: addl %ecx, %eax
+; X86-POPCNT-NEXT: xorl %edx, %edx
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt64_noimplicitfloat:
; X64-POPCNT: # %bb.0:
@@ -606,26 +606,26 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
}
define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
-; X32-LABEL: cnt32_optsize:
-; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl %ecx
-; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: andl %ecx, %edx
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: andl %ecx, %eax
-; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $4, %ecx
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X32-NEXT: shrl $24, %eax
-; X32-NEXT: retl
+; X86-LABEL: cnt32_optsize:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl %ecx
+; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: shrl $2, %eax
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl $4, %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
+; X86-NEXT: shrl $24, %eax
+; X86-NEXT: retl
;
; X64-LABEL: cnt32_optsize:
; X64: # %bb.0:
@@ -647,10 +647,10 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
; X64-NEXT: shrl $24, %eax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt32_optsize:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt32_optsize:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt32_optsize:
; X64-POPCNT: # %bb.0:
@@ -661,52 +661,52 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
}
define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
-; X32-NOSSE-LABEL: cnt64_optsize:
-; X32-NOSSE: # %bb.0:
-; X32-NOSSE-NEXT: pushl %ebx
-; X32-NOSSE-NEXT: pushl %edi
-; X32-NOSSE-NEXT: pushl %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: shrl %edx
-; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
-; X32-NOSSE-NEXT: andl %esi, %edx
-; X32-NOSSE-NEXT: subl %edx, %ecx
-; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: andl %edx, %edi
-; X32-NOSSE-NEXT: shrl $2, %ecx
-; X32-NOSSE-NEXT: andl %edx, %ecx
-; X32-NOSSE-NEXT: addl %edi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %ecx, %edi
-; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: andl %ecx, %edi
-; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edi
-; X32-NOSSE-NEXT: movl %eax, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl %esi, %ebx
-; X32-NOSSE-NEXT: subl %ebx, %eax
-; X32-NOSSE-NEXT: movl %eax, %esi
-; X32-NOSSE-NEXT: andl %edx, %esi
-; X32-NOSSE-NEXT: shrl $2, %eax
-; X32-NOSSE-NEXT: andl %edx, %eax
-; X32-NOSSE-NEXT: addl %esi, %eax
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: shrl $4, %edx
-; X32-NOSSE-NEXT: addl %eax, %edx
-; X32-NOSSE-NEXT: andl %ecx, %edx
-; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: addl %edi, %eax
-; X32-NOSSE-NEXT: xorl %edx, %edx
-; X32-NOSSE-NEXT: popl %esi
-; X32-NOSSE-NEXT: popl %edi
-; X32-NOSSE-NEXT: popl %ebx
-; X32-NOSSE-NEXT: retl
+; X86-NOSSE-LABEL: cnt64_optsize:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %edi
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %esi, %edx
+; X86-NOSSE-NEXT: subl %edx, %ecx
+; X86-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: andl %edx, %edi
+; X86-NOSSE-NEXT: shrl $2, %ecx
+; X86-NOSSE-NEXT: andl %edx, %ecx
+; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %ecx, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl %esi, %ebx
+; X86-NOSSE-NEXT: subl %ebx, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: andl %edx, %esi
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl %edx, %eax
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: andl %ecx, %edx
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: addl %edi, %eax
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: popl %edi
+; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: retl
;
; X64-LABEL: cnt64_optsize:
; X64: # %bb.0:
@@ -731,154 +731,154 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X64-NEXT: shrq $56, %rax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt64_optsize:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: addl %ecx, %eax
-; X32-POPCNT-NEXT: xorl %edx, %edx
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt64_optsize:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: addl %ecx, %eax
+; X86-POPCNT-NEXT: xorl %edx, %edx
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt64_optsize:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
;
-; X32-SSE2-LABEL: cnt64_optsize:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $1, %xmm1
-; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE2-NEXT: psubb %xmm1, %xmm0
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: pand %xmm1, %xmm2
-; X32-SSE2-NEXT: psrlw $2, %xmm0
-; X32-SSE2-NEXT: pand %xmm1, %xmm0
-; X32-SSE2-NEXT: paddb %xmm2, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $4, %xmm1
-; X32-SSE2-NEXT: paddb %xmm0, %xmm1
-; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE2-NEXT: pxor %xmm0, %xmm0
-; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
-; X32-SSE2-NEXT: movd %xmm0, %eax
-; X32-SSE2-NEXT: xorl %edx, %edx
-; X32-SSE2-NEXT: retl
+; X86-SSE2-LABEL: cnt64_optsize:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
-; X32-SSSE3-LABEL: cnt64_optsize:
-; X32-SSSE3: # %bb.0:
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
-; X32-SSSE3-NEXT: psrlw $4, %xmm1
-; X32-SSSE3-NEXT: pand %xmm0, %xmm1
-; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
-; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
-; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
-; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
-; X32-SSSE3-NEXT: movd %xmm0, %eax
-; X32-SSSE3-NEXT: xorl %edx, %edx
-; X32-SSSE3-NEXT: retl
+; X86-SSSE3-LABEL: cnt64_optsize:
+; X86-SSSE3: # %bb.0:
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
+; X86-SSSE3-NEXT: psrlw $4, %xmm1
+; X86-SSSE3-NEXT: pand %xmm0, %xmm1
+; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3
+; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
+; X86-SSSE3-NEXT: pxor %xmm0, %xmm0
+; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0
+; X86-SSSE3-NEXT: movd %xmm0, %eax
+; X86-SSSE3-NEXT: xorl %edx, %edx
+; X86-SSSE3-NEXT: retl
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
}
define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
-; X32-NOSSE-LABEL: cnt128_optsize:
-; X32-NOSSE: # %bb.0:
-; X32-NOSSE-NEXT: pushl %ebp
-; X32-NOSSE-NEXT: pushl %ebx
-; X32-NOSSE-NEXT: pushl %edi
-; X32-NOSSE-NEXT: pushl %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-NOSSE-NEXT: movl %ebx, %ecx
-; X32-NOSSE-NEXT: shrl %ecx
-; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: andl %edi, %ecx
-; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %ecx, %ebx
-; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X32-NOSSE-NEXT: movl %ebx, %ebp
-; X32-NOSSE-NEXT: andl %ecx, %ebp
-; X32-NOSSE-NEXT: shrl $2, %ebx
-; X32-NOSSE-NEXT: andl %ecx, %ebx
-; X32-NOSSE-NEXT: addl %ebp, %ebx
-; X32-NOSSE-NEXT: movl %ebx, %ebp
-; X32-NOSSE-NEXT: shrl $4, %ebp
-; X32-NOSSE-NEXT: addl %ebx, %ebp
-; X32-NOSSE-NEXT: movl %eax, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl %edi, %ebx
-; X32-NOSSE-NEXT: subl %ebx, %eax
-; X32-NOSSE-NEXT: movl %eax, %ebx
-; X32-NOSSE-NEXT: andl %ecx, %ebx
-; X32-NOSSE-NEXT: shrl $2, %eax
-; X32-NOSSE-NEXT: andl %ecx, %eax
-; X32-NOSSE-NEXT: addl %ebx, %eax
-; X32-NOSSE-NEXT: movl %eax, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %eax, %edi
-; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: andl %ebx, %ebp
-; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: andl %ebx, %edi
-; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edi
-; X32-NOSSE-NEXT: addl %eax, %edi
-; X32-NOSSE-NEXT: movl %esi, %eax
-; X32-NOSSE-NEXT: shrl %eax
-; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
-; X32-NOSSE-NEXT: andl %ebp, %eax
-; X32-NOSSE-NEXT: subl %eax, %esi
-; X32-NOSSE-NEXT: movl %esi, %eax
-; X32-NOSSE-NEXT: andl %ecx, %eax
-; X32-NOSSE-NEXT: shrl $2, %esi
-; X32-NOSSE-NEXT: andl %ecx, %esi
-; X32-NOSSE-NEXT: addl %eax, %esi
-; X32-NOSSE-NEXT: movl %esi, %eax
-; X32-NOSSE-NEXT: shrl $4, %eax
-; X32-NOSSE-NEXT: addl %esi, %eax
-; X32-NOSSE-NEXT: movl %edx, %esi
-; X32-NOSSE-NEXT: shrl %esi
-; X32-NOSSE-NEXT: andl %ebp, %esi
-; X32-NOSSE-NEXT: subl %esi, %edx
-; X32-NOSSE-NEXT: movl %edx, %esi
-; X32-NOSSE-NEXT: andl %ecx, %esi
-; X32-NOSSE-NEXT: shrl $2, %edx
-; X32-NOSSE-NEXT: andl %ecx, %edx
-; X32-NOSSE-NEXT: addl %esi, %edx
-; X32-NOSSE-NEXT: movl %edx, %ecx
-; X32-NOSSE-NEXT: shrl $4, %ecx
-; X32-NOSSE-NEXT: addl %edx, %ecx
-; X32-NOSSE-NEXT: andl %ebx, %eax
-; X32-NOSSE-NEXT: andl %ebx, %ecx
-; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %ecx
-; X32-NOSSE-NEXT: addl %eax, %ecx
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: addl %edi, %ecx
-; X32-NOSSE-NEXT: xorl %edx, %edx
-; X32-NOSSE-NEXT: movl %edx, 12(%eax)
-; X32-NOSSE-NEXT: movl %edx, 8(%eax)
-; X32-NOSSE-NEXT: movl %edx, 4(%eax)
-; X32-NOSSE-NEXT: movl %ecx, (%eax)
-; X32-NOSSE-NEXT: popl %esi
-; X32-NOSSE-NEXT: popl %edi
-; X32-NOSSE-NEXT: popl %ebx
-; X32-NOSSE-NEXT: popl %ebp
-; X32-NOSSE-NEXT: retl $4
+; X86-NOSSE-LABEL: cnt128_optsize:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %edi
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ecx
+; X86-NOSSE-NEXT: shrl %ecx
+; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %edi, %ecx
+; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %ecx, %ebx
+; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X86-NOSSE-NEXT: movl %ebx, %ebp
+; X86-NOSSE-NEXT: andl %ecx, %ebp
+; X86-NOSSE-NEXT: shrl $2, %ebx
+; X86-NOSSE-NEXT: andl %ecx, %ebx
+; X86-NOSSE-NEXT: addl %ebp, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ebp
+; X86-NOSSE-NEXT: shrl $4, %ebp
+; X86-NOSSE-NEXT: addl %ebx, %ebp
+; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl %edi, %ebx
+; X86-NOSSE-NEXT: subl %ebx, %eax
+; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: andl %ecx, %ebx
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: addl %ebx, %eax
+; X86-NOSSE-NEXT: movl %eax, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %eax, %edi
+; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %ebx, %ebp
+; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: andl %ebx, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: addl %eax, %edi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: shrl %eax
+; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %ebp, %eax
+; X86-NOSSE-NEXT: subl %eax, %esi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: addl %eax, %esi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: shrl $4, %eax
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl %ebp, %esi
+; X86-NOSSE-NEXT: subl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: shrl $2, %edx
+; X86-NOSSE-NEXT: andl %ecx, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %ecx
+; X86-NOSSE-NEXT: shrl $4, %ecx
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: andl %ebx, %eax
+; X86-NOSSE-NEXT: andl %ebx, %ecx
+; X86-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ecx
+; X86-NOSSE-NEXT: addl %eax, %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: movl %edx, 12(%eax)
+; X86-NOSSE-NEXT: movl %edx, 8(%eax)
+; X86-NOSSE-NEXT: movl %edx, 4(%eax)
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: popl %edi
+; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl $4
;
; X64-LABEL: cnt128_optsize:
; X64: # %bb.0:
@@ -920,24 +920,24 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt128_optsize:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: pushl %esi
-; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
-; X32-POPCNT-NEXT: addl %ecx, %edx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
-; X32-POPCNT-NEXT: addl %ecx, %esi
-; X32-POPCNT-NEXT: addl %edx, %esi
-; X32-POPCNT-NEXT: xorl %ecx, %ecx
-; X32-POPCNT-NEXT: movl %ecx, 12(%eax)
-; X32-POPCNT-NEXT: movl %ecx, 8(%eax)
-; X32-POPCNT-NEXT: movl %ecx, 4(%eax)
-; X32-POPCNT-NEXT: movl %esi, (%eax)
-; X32-POPCNT-NEXT: popl %esi
-; X32-POPCNT-NEXT: retl $4
+; X86-POPCNT-LABEL: cnt128_optsize:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %esi
+; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: addl %ecx, %edx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: addl %ecx, %esi
+; X86-POPCNT-NEXT: addl %edx, %esi
+; X86-POPCNT-NEXT: xorl %ecx, %ecx
+; X86-POPCNT-NEXT: movl %ecx, 12(%eax)
+; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
+; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
+; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_optsize:
; X64-POPCNT: # %bb.0:
@@ -947,114 +947,114 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X64-POPCNT-NEXT: xorl %edx, %edx
; X64-POPCNT-NEXT: retq
;
-; X32-SSE2-LABEL: cnt128_optsize:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $1, %xmm1
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; X32-SSE2-NEXT: pand %xmm2, %xmm1
-; X32-SSE2-NEXT: psubb %xmm1, %xmm0
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: pand %xmm1, %xmm3
-; X32-SSE2-NEXT: psrlw $2, %xmm0
-; X32-SSE2-NEXT: pand %xmm1, %xmm0
-; X32-SSE2-NEXT: paddb %xmm3, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: psrlw $4, %xmm3
-; X32-SSE2-NEXT: paddb %xmm0, %xmm3
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSE2-NEXT: pand %xmm0, %xmm3
-; X32-SSE2-NEXT: pxor %xmm4, %xmm4
-; X32-SSE2-NEXT: psadbw %xmm4, %xmm3
-; X32-SSE2-NEXT: movd %xmm3, %ecx
-; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm5
-; X32-SSE2-NEXT: psrlw $1, %xmm5
-; X32-SSE2-NEXT: pand %xmm2, %xmm5
-; X32-SSE2-NEXT: psubb %xmm5, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
-; X32-SSE2-NEXT: pand %xmm1, %xmm2
-; X32-SSE2-NEXT: psrlw $2, %xmm3
-; X32-SSE2-NEXT: pand %xmm1, %xmm3
-; X32-SSE2-NEXT: paddb %xmm2, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm1
-; X32-SSE2-NEXT: psrlw $4, %xmm1
-; X32-SSE2-NEXT: paddb %xmm3, %xmm1
-; X32-SSE2-NEXT: pand %xmm0, %xmm1
-; X32-SSE2-NEXT: psadbw %xmm4, %xmm1
-; X32-SSE2-NEXT: movd %xmm1, %edx
-; X32-SSE2-NEXT: addl %ecx, %edx
-; X32-SSE2-NEXT: xorl %ecx, %ecx
-; X32-SSE2-NEXT: movl %ecx, 12(%eax)
-; X32-SSE2-NEXT: movl %ecx, 8(%eax)
-; X32-SSE2-NEXT: movl %ecx, 4(%eax)
-; X32-SSE2-NEXT: movl %edx, (%eax)
-; X32-SSE2-NEXT: retl $4
+; X86-SSE2-LABEL: cnt128_optsize:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; X86-SSE2-NEXT: pand %xmm2, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm3, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: psrlw $4, %xmm3
+; X86-SSE2-NEXT: paddb %xmm0, %xmm3
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSE2-NEXT: pand %xmm0, %xmm3
+; X86-SSE2-NEXT: pxor %xmm4, %xmm4
+; X86-SSE2-NEXT: psadbw %xmm4, %xmm3
+; X86-SSE2-NEXT: movd %xmm3, %ecx
+; X86-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
+; X86-SSE2-NEXT: psrlw $1, %xmm5
+; X86-SSE2-NEXT: pand %xmm2, %xmm5
+; X86-SSE2-NEXT: psubb %xmm5, %xmm3
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm3
+; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: paddb %xmm2, %xmm3
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm3, %xmm1
+; X86-SSE2-NEXT: pand %xmm0, %xmm1
+; X86-SSE2-NEXT: psadbw %xmm4, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %edx
+; X86-SSE2-NEXT: addl %ecx, %edx
+; X86-SSE2-NEXT: xorl %ecx, %ecx
+; X86-SSE2-NEXT: movl %ecx, 12(%eax)
+; X86-SSE2-NEXT: movl %ecx, 8(%eax)
+; X86-SSE2-NEXT: movl %ecx, 4(%eax)
+; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: retl $4
;
-; X32-SSSE3-LABEL: cnt128_optsize:
-; X32-SSSE3: # %bb.0:
-; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
-; X32-SSSE3-NEXT: psrlw $4, %xmm1
-; X32-SSSE3-NEXT: pand %xmm0, %xmm1
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2
-; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2
-; X32-SSSE3-NEXT: paddb %xmm4, %xmm2
-; X32-SSSE3-NEXT: pxor %xmm1, %xmm1
-; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2
-; X32-SSSE3-NEXT: movd %xmm2, %ecx
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4
-; X32-SSSE3-NEXT: pand %xmm0, %xmm4
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5
-; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5
-; X32-SSSE3-NEXT: psrlw $4, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3
-; X32-SSSE3-NEXT: paddb %xmm5, %xmm3
-; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3
-; X32-SSSE3-NEXT: movd %xmm3, %edx
-; X32-SSSE3-NEXT: addl %ecx, %edx
-; X32-SSSE3-NEXT: xorl %ecx, %ecx
-; X32-SSSE3-NEXT: movl %ecx, 12(%eax)
-; X32-SSSE3-NEXT: movl %ecx, 8(%eax)
-; X32-SSSE3-NEXT: movl %ecx, 4(%eax)
-; X32-SSSE3-NEXT: movl %edx, (%eax)
-; X32-SSSE3-NEXT: retl $4
+; X86-SSSE3-LABEL: cnt128_optsize:
+; X86-SSSE3: # %bb.0:
+; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
+; X86-SSSE3-NEXT: psrlw $4, %xmm1
+; X86-SSSE3-NEXT: pand %xmm0, %xmm1
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm2
+; X86-SSSE3-NEXT: pshufb %xmm1, %xmm2
+; X86-SSSE3-NEXT: paddb %xmm4, %xmm2
+; X86-SSSE3-NEXT: pxor %xmm1, %xmm1
+; X86-SSSE3-NEXT: psadbw %xmm1, %xmm2
+; X86-SSSE3-NEXT: movd %xmm2, %ecx
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm2, %xmm4
+; X86-SSSE3-NEXT: pand %xmm0, %xmm4
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm5
+; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5
+; X86-SSSE3-NEXT: psrlw $4, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3
+; X86-SSSE3-NEXT: paddb %xmm5, %xmm3
+; X86-SSSE3-NEXT: psadbw %xmm1, %xmm3
+; X86-SSSE3-NEXT: movd %xmm3, %edx
+; X86-SSSE3-NEXT: addl %ecx, %edx
+; X86-SSSE3-NEXT: xorl %ecx, %ecx
+; X86-SSSE3-NEXT: movl %ecx, 12(%eax)
+; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
+; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
+; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
}
define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
-; X32-LABEL: cnt32_pgso:
-; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl %ecx
-; X32-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; X32-NEXT: subl %ecx, %eax
-; X32-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X32-NEXT: movl %eax, %edx
-; X32-NEXT: andl %ecx, %edx
-; X32-NEXT: shrl $2, %eax
-; X32-NEXT: andl %ecx, %eax
-; X32-NEXT: addl %edx, %eax
-; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: shrl $4, %ecx
-; X32-NEXT: addl %eax, %ecx
-; X32-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; X32-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
-; X32-NEXT: shrl $24, %eax
-; X32-NEXT: retl
+; X86-LABEL: cnt32_pgso:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl %ecx
+; X86-NEXT: andl $1431655765, %ecx # imm = 0x55555555
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: andl %ecx, %edx
+; X86-NEXT: shrl $2, %eax
+; X86-NEXT: andl %ecx, %eax
+; X86-NEXT: addl %edx, %eax
+; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: shrl $4, %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NEXT: imull $16843009, %ecx, %eax # imm = 0x1010101
+; X86-NEXT: shrl $24, %eax
+; X86-NEXT: retl
;
; X64-LABEL: cnt32_pgso:
; X64: # %bb.0:
@@ -1076,10 +1076,10 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
; X64-NEXT: shrl $24, %eax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt32_pgso:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt32_pgso:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt32_pgso:
; X64-POPCNT: # %bb.0:
@@ -1090,52 +1090,52 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
}
define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
-; X32-NOSSE-LABEL: cnt64_pgso:
-; X32-NOSSE: # %bb.0:
-; X32-NOSSE-NEXT: pushl %ebx
-; X32-NOSSE-NEXT: pushl %edi
-; X32-NOSSE-NEXT: pushl %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edx
-; X32-NOSSE-NEXT: shrl %edx
-; X32-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
-; X32-NOSSE-NEXT: andl %esi, %edx
-; X32-NOSSE-NEXT: subl %edx, %ecx
-; X32-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: andl %edx, %edi
-; X32-NOSSE-NEXT: shrl $2, %ecx
-; X32-NOSSE-NEXT: andl %edx, %ecx
-; X32-NOSSE-NEXT: addl %edi, %ecx
-; X32-NOSSE-NEXT: movl %ecx, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %ecx, %edi
-; X32-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: andl %ecx, %edi
-; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edi
-; X32-NOSSE-NEXT: movl %eax, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl %esi, %ebx
-; X32-NOSSE-NEXT: subl %ebx, %eax
-; X32-NOSSE-NEXT: movl %eax, %esi
-; X32-NOSSE-NEXT: andl %edx, %esi
-; X32-NOSSE-NEXT: shrl $2, %eax
-; X32-NOSSE-NEXT: andl %edx, %eax
-; X32-NOSSE-NEXT: addl %esi, %eax
-; X32-NOSSE-NEXT: movl %eax, %edx
-; X32-NOSSE-NEXT: shrl $4, %edx
-; X32-NOSSE-NEXT: addl %eax, %edx
-; X32-NOSSE-NEXT: andl %ecx, %edx
-; X32-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: addl %edi, %eax
-; X32-NOSSE-NEXT: xorl %edx, %edx
-; X32-NOSSE-NEXT: popl %esi
-; X32-NOSSE-NEXT: popl %edi
-; X32-NOSSE-NEXT: popl %ebx
-; X32-NOSSE-NEXT: retl
+; X86-NOSSE-LABEL: cnt64_pgso:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %edi
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edx
+; X86-NOSSE-NEXT: shrl %edx
+; X86-NOSSE-NEXT: movl $1431655765, %esi # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %esi, %edx
+; X86-NOSSE-NEXT: subl %edx, %ecx
+; X86-NOSSE-NEXT: movl $858993459, %edx # imm = 0x33333333
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: andl %edx, %edi
+; X86-NOSSE-NEXT: shrl $2, %ecx
+; X86-NOSSE-NEXT: andl %edx, %ecx
+; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: movl %ecx, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %ecx, %edi
+; X86-NOSSE-NEXT: movl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %ecx, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl %esi, %ebx
+; X86-NOSSE-NEXT: subl %ebx, %eax
+; X86-NOSSE-NEXT: movl %eax, %esi
+; X86-NOSSE-NEXT: andl %edx, %esi
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl %edx, %eax
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %eax, %edx
+; X86-NOSSE-NEXT: shrl $4, %edx
+; X86-NOSSE-NEXT: addl %eax, %edx
+; X86-NOSSE-NEXT: andl %ecx, %edx
+; X86-NOSSE-NEXT: imull $16843009, %edx, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: addl %edi, %eax
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: popl %edi
+; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: retl
;
; X64-LABEL: cnt64_pgso:
; X64: # %bb.0:
@@ -1160,154 +1160,154 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
; X64-NEXT: shrq $56, %rax
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt64_pgso:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: addl %ecx, %eax
-; X32-POPCNT-NEXT: xorl %edx, %edx
-; X32-POPCNT-NEXT: retl
+; X86-POPCNT-LABEL: cnt64_pgso:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: addl %ecx, %eax
+; X86-POPCNT-NEXT: xorl %edx, %edx
+; X86-POPCNT-NEXT: retl
;
; X64-POPCNT-LABEL: cnt64_pgso:
; X64-POPCNT: # %bb.0:
; X64-POPCNT-NEXT: popcntq %rdi, %rax
; X64-POPCNT-NEXT: retq
;
-; X32-SSE2-LABEL: cnt64_pgso:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $1, %xmm1
-; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE2-NEXT: psubb %xmm1, %xmm0
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm2
-; X32-SSE2-NEXT: pand %xmm1, %xmm2
-; X32-SSE2-NEXT: psrlw $2, %xmm0
-; X32-SSE2-NEXT: pand %xmm1, %xmm0
-; X32-SSE2-NEXT: paddb %xmm2, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $4, %xmm1
-; X32-SSE2-NEXT: paddb %xmm0, %xmm1
-; X32-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
-; X32-SSE2-NEXT: pxor %xmm0, %xmm0
-; X32-SSE2-NEXT: psadbw %xmm1, %xmm0
-; X32-SSE2-NEXT: movd %xmm0, %eax
-; X32-SSE2-NEXT: xorl %edx, %edx
-; X32-SSE2-NEXT: retl
+; X86-SSE2-LABEL: cnt64_pgso:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm2, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm0, %xmm1
+; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm1
+; X86-SSE2-NEXT: pxor %xmm0, %xmm0
+; X86-SSE2-NEXT: psadbw %xmm1, %xmm0
+; X86-SSE2-NEXT: movd %xmm0, %eax
+; X86-SSE2-NEXT: xorl %edx, %edx
+; X86-SSE2-NEXT: retl
;
-; X32-SSSE3-LABEL: cnt64_pgso:
-; X32-SSSE3: # %bb.0:
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
-; X32-SSSE3-NEXT: psrlw $4, %xmm1
-; X32-SSSE3-NEXT: pand %xmm0, %xmm1
-; X32-SSSE3-NEXT: pshufb %xmm1, %xmm3
-; X32-SSSE3-NEXT: paddb %xmm4, %xmm3
-; X32-SSSE3-NEXT: pxor %xmm0, %xmm0
-; X32-SSSE3-NEXT: psadbw %xmm3, %xmm0
-; X32-SSSE3-NEXT: movd %xmm0, %eax
-; X32-SSSE3-NEXT: xorl %edx, %edx
-; X32-SSSE3-NEXT: retl
+; X86-SSSE3-LABEL: cnt64_pgso:
+; X86-SSSE3: # %bb.0:
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
+; X86-SSSE3-NEXT: psrlw $4, %xmm1
+; X86-SSSE3-NEXT: pand %xmm0, %xmm1
+; X86-SSSE3-NEXT: pshufb %xmm1, %xmm3
+; X86-SSSE3-NEXT: paddb %xmm4, %xmm3
+; X86-SSSE3-NEXT: pxor %xmm0, %xmm0
+; X86-SSSE3-NEXT: psadbw %xmm3, %xmm0
+; X86-SSSE3-NEXT: movd %xmm0, %eax
+; X86-SSSE3-NEXT: xorl %edx, %edx
+; X86-SSSE3-NEXT: retl
%cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %cnt
}
define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
-; X32-NOSSE-LABEL: cnt128_pgso:
-; X32-NOSSE: # %bb.0:
-; X32-NOSSE-NEXT: pushl %ebp
-; X32-NOSSE-NEXT: pushl %ebx
-; X32-NOSSE-NEXT: pushl %edi
-; X32-NOSSE-NEXT: pushl %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
-; X32-NOSSE-NEXT: movl %ebx, %ecx
-; X32-NOSSE-NEXT: shrl %ecx
-; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: andl %edi, %ecx
-; X32-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
-; X32-NOSSE-NEXT: subl %ecx, %ebx
-; X32-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
-; X32-NOSSE-NEXT: movl %ebx, %ebp
-; X32-NOSSE-NEXT: andl %ecx, %ebp
-; X32-NOSSE-NEXT: shrl $2, %ebx
-; X32-NOSSE-NEXT: andl %ecx, %ebx
-; X32-NOSSE-NEXT: addl %ebp, %ebx
-; X32-NOSSE-NEXT: movl %ebx, %ebp
-; X32-NOSSE-NEXT: shrl $4, %ebp
-; X32-NOSSE-NEXT: addl %ebx, %ebp
-; X32-NOSSE-NEXT: movl %eax, %ebx
-; X32-NOSSE-NEXT: shrl %ebx
-; X32-NOSSE-NEXT: andl %edi, %ebx
-; X32-NOSSE-NEXT: subl %ebx, %eax
-; X32-NOSSE-NEXT: movl %eax, %ebx
-; X32-NOSSE-NEXT: andl %ecx, %ebx
-; X32-NOSSE-NEXT: shrl $2, %eax
-; X32-NOSSE-NEXT: andl %ecx, %eax
-; X32-NOSSE-NEXT: addl %ebx, %eax
-; X32-NOSSE-NEXT: movl %eax, %edi
-; X32-NOSSE-NEXT: shrl $4, %edi
-; X32-NOSSE-NEXT: addl %eax, %edi
-; X32-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
-; X32-NOSSE-NEXT: andl %ebx, %ebp
-; X32-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: andl %ebx, %edi
-; X32-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %edi
-; X32-NOSSE-NEXT: addl %eax, %edi
-; X32-NOSSE-NEXT: movl %esi, %eax
-; X32-NOSSE-NEXT: shrl %eax
-; X32-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
-; X32-NOSSE-NEXT: andl %ebp, %eax
-; X32-NOSSE-NEXT: subl %eax, %esi
-; X32-NOSSE-NEXT: movl %esi, %eax
-; X32-NOSSE-NEXT: andl %ecx, %eax
-; X32-NOSSE-NEXT: shrl $2, %esi
-; X32-NOSSE-NEXT: andl %ecx, %esi
-; X32-NOSSE-NEXT: addl %eax, %esi
-; X32-NOSSE-NEXT: movl %esi, %eax
-; X32-NOSSE-NEXT: shrl $4, %eax
-; X32-NOSSE-NEXT: addl %esi, %eax
-; X32-NOSSE-NEXT: movl %edx, %esi
-; X32-NOSSE-NEXT: shrl %esi
-; X32-NOSSE-NEXT: andl %ebp, %esi
-; X32-NOSSE-NEXT: subl %esi, %edx
-; X32-NOSSE-NEXT: movl %edx, %esi
-; X32-NOSSE-NEXT: andl %ecx, %esi
-; X32-NOSSE-NEXT: shrl $2, %edx
-; X32-NOSSE-NEXT: andl %ecx, %edx
-; X32-NOSSE-NEXT: addl %esi, %edx
-; X32-NOSSE-NEXT: movl %edx, %ecx
-; X32-NOSSE-NEXT: shrl $4, %ecx
-; X32-NOSSE-NEXT: addl %edx, %ecx
-; X32-NOSSE-NEXT: andl %ebx, %eax
-; X32-NOSSE-NEXT: andl %ebx, %ecx
-; X32-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %eax
-; X32-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
-; X32-NOSSE-NEXT: shrl $24, %ecx
-; X32-NOSSE-NEXT: addl %eax, %ecx
-; X32-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOSSE-NEXT: addl %edi, %ecx
-; X32-NOSSE-NEXT: xorl %edx, %edx
-; X32-NOSSE-NEXT: movl %edx, 12(%eax)
-; X32-NOSSE-NEXT: movl %edx, 8(%eax)
-; X32-NOSSE-NEXT: movl %edx, 4(%eax)
-; X32-NOSSE-NEXT: movl %ecx, (%eax)
-; X32-NOSSE-NEXT: popl %esi
-; X32-NOSSE-NEXT: popl %edi
-; X32-NOSSE-NEXT: popl %ebx
-; X32-NOSSE-NEXT: popl %ebp
-; X32-NOSSE-NEXT: retl $4
+; X86-NOSSE-LABEL: cnt128_pgso:
+; X86-NOSSE: # %bb.0:
+; X86-NOSSE-NEXT: pushl %ebp
+; X86-NOSSE-NEXT: pushl %ebx
+; X86-NOSSE-NEXT: pushl %edi
+; X86-NOSSE-NEXT: pushl %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ecx
+; X86-NOSSE-NEXT: shrl %ecx
+; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %edi, %ecx
+; X86-NOSSE-NEXT: movl $1431655765, %edi # imm = 0x55555555
+; X86-NOSSE-NEXT: subl %ecx, %ebx
+; X86-NOSSE-NEXT: movl $858993459, %ecx # imm = 0x33333333
+; X86-NOSSE-NEXT: movl %ebx, %ebp
+; X86-NOSSE-NEXT: andl %ecx, %ebp
+; X86-NOSSE-NEXT: shrl $2, %ebx
+; X86-NOSSE-NEXT: andl %ecx, %ebx
+; X86-NOSSE-NEXT: addl %ebp, %ebx
+; X86-NOSSE-NEXT: movl %ebx, %ebp
+; X86-NOSSE-NEXT: shrl $4, %ebp
+; X86-NOSSE-NEXT: addl %ebx, %ebp
+; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: shrl %ebx
+; X86-NOSSE-NEXT: andl %edi, %ebx
+; X86-NOSSE-NEXT: subl %ebx, %eax
+; X86-NOSSE-NEXT: movl %eax, %ebx
+; X86-NOSSE-NEXT: andl %ecx, %ebx
+; X86-NOSSE-NEXT: shrl $2, %eax
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: addl %ebx, %eax
+; X86-NOSSE-NEXT: movl %eax, %edi
+; X86-NOSSE-NEXT: shrl $4, %edi
+; X86-NOSSE-NEXT: addl %eax, %edi
+; X86-NOSSE-NEXT: movl $252645135, %ebx # imm = 0xF0F0F0F
+; X86-NOSSE-NEXT: andl %ebx, %ebp
+; X86-NOSSE-NEXT: imull $16843009, %ebp, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: andl %ebx, %edi
+; X86-NOSSE-NEXT: imull $16843009, %edi, %edi # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %edi
+; X86-NOSSE-NEXT: addl %eax, %edi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: shrl %eax
+; X86-NOSSE-NEXT: movl $1431655765, %ebp # imm = 0x55555555
+; X86-NOSSE-NEXT: andl %ebp, %eax
+; X86-NOSSE-NEXT: subl %eax, %esi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: andl %ecx, %eax
+; X86-NOSSE-NEXT: shrl $2, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: addl %eax, %esi
+; X86-NOSSE-NEXT: movl %esi, %eax
+; X86-NOSSE-NEXT: shrl $4, %eax
+; X86-NOSSE-NEXT: addl %esi, %eax
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: shrl %esi
+; X86-NOSSE-NEXT: andl %ebp, %esi
+; X86-NOSSE-NEXT: subl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %esi
+; X86-NOSSE-NEXT: andl %ecx, %esi
+; X86-NOSSE-NEXT: shrl $2, %edx
+; X86-NOSSE-NEXT: andl %ecx, %edx
+; X86-NOSSE-NEXT: addl %esi, %edx
+; X86-NOSSE-NEXT: movl %edx, %ecx
+; X86-NOSSE-NEXT: shrl $4, %ecx
+; X86-NOSSE-NEXT: addl %edx, %ecx
+; X86-NOSSE-NEXT: andl %ebx, %eax
+; X86-NOSSE-NEXT: andl %ebx, %ecx
+; X86-NOSSE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %eax
+; X86-NOSSE-NEXT: imull $16843009, %ecx, %ecx # imm = 0x1010101
+; X86-NOSSE-NEXT: shrl $24, %ecx
+; X86-NOSSE-NEXT: addl %eax, %ecx
+; X86-NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOSSE-NEXT: addl %edi, %ecx
+; X86-NOSSE-NEXT: xorl %edx, %edx
+; X86-NOSSE-NEXT: movl %edx, 12(%eax)
+; X86-NOSSE-NEXT: movl %edx, 8(%eax)
+; X86-NOSSE-NEXT: movl %edx, 4(%eax)
+; X86-NOSSE-NEXT: movl %ecx, (%eax)
+; X86-NOSSE-NEXT: popl %esi
+; X86-NOSSE-NEXT: popl %edi
+; X86-NOSSE-NEXT: popl %ebx
+; X86-NOSSE-NEXT: popl %ebp
+; X86-NOSSE-NEXT: retl $4
;
; X64-LABEL: cnt128_pgso:
; X64: # %bb.0:
@@ -1349,24 +1349,24 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X64-NEXT: xorl %edx, %edx
; X64-NEXT: retq
;
-; X32-POPCNT-LABEL: cnt128_pgso:
-; X32-POPCNT: # %bb.0:
-; X32-POPCNT-NEXT: pushl %esi
-; X32-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
-; X32-POPCNT-NEXT: addl %ecx, %edx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
-; X32-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
-; X32-POPCNT-NEXT: addl %ecx, %esi
-; X32-POPCNT-NEXT: addl %edx, %esi
-; X32-POPCNT-NEXT: xorl %ecx, %ecx
-; X32-POPCNT-NEXT: movl %ecx, 12(%eax)
-; X32-POPCNT-NEXT: movl %ecx, 8(%eax)
-; X32-POPCNT-NEXT: movl %ecx, 4(%eax)
-; X32-POPCNT-NEXT: movl %esi, (%eax)
-; X32-POPCNT-NEXT: popl %esi
-; X32-POPCNT-NEXT: retl $4
+; X86-POPCNT-LABEL: cnt128_pgso:
+; X86-POPCNT: # %bb.0:
+; X86-POPCNT-NEXT: pushl %esi
+; X86-POPCNT-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %edx
+; X86-POPCNT-NEXT: addl %ecx, %edx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %ecx
+; X86-POPCNT-NEXT: popcntl {{[0-9]+}}(%esp), %esi
+; X86-POPCNT-NEXT: addl %ecx, %esi
+; X86-POPCNT-NEXT: addl %edx, %esi
+; X86-POPCNT-NEXT: xorl %ecx, %ecx
+; X86-POPCNT-NEXT: movl %ecx, 12(%eax)
+; X86-POPCNT-NEXT: movl %ecx, 8(%eax)
+; X86-POPCNT-NEXT: movl %ecx, 4(%eax)
+; X86-POPCNT-NEXT: movl %esi, (%eax)
+; X86-POPCNT-NEXT: popl %esi
+; X86-POPCNT-NEXT: retl $4
;
; X64-POPCNT-LABEL: cnt128_pgso:
; X64-POPCNT: # %bb.0:
@@ -1376,89 +1376,89 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X64-POPCNT-NEXT: xorl %edx, %edx
; X64-POPCNT-NEXT: retq
;
-; X32-SSE2-LABEL: cnt128_pgso:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm1
-; X32-SSE2-NEXT: psrlw $1, %xmm1
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
-; X32-SSE2-NEXT: pand %xmm2, %xmm1
-; X32-SSE2-NEXT: psubb %xmm1, %xmm0
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: pand %xmm1, %xmm3
-; X32-SSE2-NEXT: psrlw $2, %xmm0
-; X32-SSE2-NEXT: pand %xmm1, %xmm0
-; X32-SSE2-NEXT: paddb %xmm3, %xmm0
-; X32-SSE2-NEXT: movdqa %xmm0, %xmm3
-; X32-SSE2-NEXT: psrlw $4, %xmm3
-; X32-SSE2-NEXT: paddb %xmm0, %xmm3
-; X32-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSE2-NEXT: pand %xmm0, %xmm3
-; X32-SSE2-NEXT: pxor %xmm4, %xmm4
-; X32-SSE2-NEXT: psadbw %xmm4, %xmm3
-; X32-SSE2-NEXT: movd %xmm3, %ecx
-; X32-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm5
-; X32-SSE2-NEXT: psrlw $1, %xmm5
-; X32-SSE2-NEXT: pand %xmm2, %xmm5
-; X32-SSE2-NEXT: psubb %xmm5, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm2
-; X32-SSE2-NEXT: pand %xmm1, %xmm2
-; X32-SSE2-NEXT: psrlw $2, %xmm3
-; X32-SSE2-NEXT: pand %xmm1, %xmm3
-; X32-SSE2-NEXT: paddb %xmm2, %xmm3
-; X32-SSE2-NEXT: movdqa %xmm3, %xmm1
-; X32-SSE2-NEXT: psrlw $4, %xmm1
-; X32-SSE2-NEXT: paddb %xmm3, %xmm1
-; X32-SSE2-NEXT: pand %xmm0, %xmm1
-; X32-SSE2-NEXT: psadbw %xmm4, %xmm1
-; X32-SSE2-NEXT: movd %xmm1, %edx
-; X32-SSE2-NEXT: addl %ecx, %edx
-; X32-SSE2-NEXT: xorl %ecx, %ecx
-; X32-SSE2-NEXT: movl %ecx, 12(%eax)
-; X32-SSE2-NEXT: movl %ecx, 8(%eax)
-; X32-SSE2-NEXT: movl %ecx, 4(%eax)
-; X32-SSE2-NEXT: movl %edx, (%eax)
-; X32-SSE2-NEXT: retl $4
+; X86-SSE2-LABEL: cnt128_pgso:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT: psrlw $1, %xmm1
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; X86-SSE2-NEXT: pand %xmm2, %xmm1
+; X86-SSE2-NEXT: psubb %xmm1, %xmm0
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: psrlw $2, %xmm0
+; X86-SSE2-NEXT: pand %xmm1, %xmm0
+; X86-SSE2-NEXT: paddb %xmm3, %xmm0
+; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
+; X86-SSE2-NEXT: psrlw $4, %xmm3
+; X86-SSE2-NEXT: paddb %xmm0, %xmm3
+; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSE2-NEXT: pand %xmm0, %xmm3
+; X86-SSE2-NEXT: pxor %xmm4, %xmm4
+; X86-SSE2-NEXT: psadbw %xmm4, %xmm3
+; X86-SSE2-NEXT: movd %xmm3, %ecx
+; X86-SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
+; X86-SSE2-NEXT: psrlw $1, %xmm5
+; X86-SSE2-NEXT: pand %xmm2, %xmm5
+; X86-SSE2-NEXT: psubb %xmm5, %xmm3
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
+; X86-SSE2-NEXT: pand %xmm1, %xmm2
+; X86-SSE2-NEXT: psrlw $2, %xmm3
+; X86-SSE2-NEXT: pand %xmm1, %xmm3
+; X86-SSE2-NEXT: paddb %xmm2, %xmm3
+; X86-SSE2-NEXT: movdqa %xmm3, %xmm1
+; X86-SSE2-NEXT: psrlw $4, %xmm1
+; X86-SSE2-NEXT: paddb %xmm3, %xmm1
+; X86-SSE2-NEXT: pand %xmm0, %xmm1
+; X86-SSE2-NEXT: psadbw %xmm4, %xmm1
+; X86-SSE2-NEXT: movd %xmm1, %edx
+; X86-SSE2-NEXT: addl %ecx, %edx
+; X86-SSE2-NEXT: xorl %ecx, %ecx
+; X86-SSE2-NEXT: movl %ecx, 12(%eax)
+; X86-SSE2-NEXT: movl %ecx, 8(%eax)
+; X86-SSE2-NEXT: movl %ecx, 4(%eax)
+; X86-SSE2-NEXT: movl %edx, (%eax)
+; X86-SSE2-NEXT: retl $4
;
-; X32-SSSE3-LABEL: cnt128_pgso:
-; X32-SSSE3: # %bb.0:
-; X32-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm1, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm4
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm4
-; X32-SSSE3-NEXT: psrlw $4, %xmm1
-; X32-SSSE3-NEXT: pand %xmm0, %xmm1
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm2
-; X32-SSSE3-NEXT: pshufb %xmm1, %xmm2
-; X32-SSSE3-NEXT: paddb %xmm4, %xmm2
-; X32-SSSE3-NEXT: pxor %xmm1, %xmm1
-; X32-SSSE3-NEXT: psadbw %xmm1, %xmm2
-; X32-SSSE3-NEXT: movd %xmm2, %ecx
-; X32-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
-; X32-SSSE3-NEXT: movdqa %xmm2, %xmm4
-; X32-SSSE3-NEXT: pand %xmm0, %xmm4
-; X32-SSSE3-NEXT: movdqa %xmm3, %xmm5
-; X32-SSSE3-NEXT: pshufb %xmm4, %xmm5
-; X32-SSSE3-NEXT: psrlw $4, %xmm2
-; X32-SSSE3-NEXT: pand %xmm0, %xmm2
-; X32-SSSE3-NEXT: pshufb %xmm2, %xmm3
-; X32-SSSE3-NEXT: paddb %xmm5, %xmm3
-; X32-SSSE3-NEXT: psadbw %xmm1, %xmm3
-; X32-SSSE3-NEXT: movd %xmm3, %edx
-; X32-SSSE3-NEXT: addl %ecx, %edx
-; X32-SSSE3-NEXT: xorl %ecx, %ecx
-; X32-SSSE3-NEXT: movl %ecx, 12(%eax)
-; X32-SSSE3-NEXT: movl %ecx, 8(%eax)
-; X32-SSSE3-NEXT: movl %ecx, 4(%eax)
-; X32-SSSE3-NEXT: movl %edx, (%eax)
-; X32-SSSE3-NEXT: retl $4
+; X86-SSSE3-LABEL: cnt128_pgso:
+; X86-SSSE3: # %bb.0:
+; X86-SSSE3-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm1, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm4
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm4
+; X86-SSSE3-NEXT: psrlw $4, %xmm1
+; X86-SSSE3-NEXT: pand %xmm0, %xmm1
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm2
+; X86-SSSE3-NEXT: pshufb %xmm1, %xmm2
+; X86-SSSE3-NEXT: paddb %xmm4, %xmm2
+; X86-SSSE3-NEXT: pxor %xmm1, %xmm1
+; X86-SSSE3-NEXT: psadbw %xmm1, %xmm2
+; X86-SSSE3-NEXT: movd %xmm2, %ecx
+; X86-SSSE3-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
+; X86-SSSE3-NEXT: movdqa %xmm2, %xmm4
+; X86-SSSE3-NEXT: pand %xmm0, %xmm4
+; X86-SSSE3-NEXT: movdqa %xmm3, %xmm5
+; X86-SSSE3-NEXT: pshufb %xmm4, %xmm5
+; X86-SSSE3-NEXT: psrlw $4, %xmm2
+; X86-SSSE3-NEXT: pand %xmm0, %xmm2
+; X86-SSSE3-NEXT: pshufb %xmm2, %xmm3
+; X86-SSSE3-NEXT: paddb %xmm5, %xmm3
+; X86-SSSE3-NEXT: psadbw %xmm1, %xmm3
+; X86-SSSE3-NEXT: movd %xmm3, %edx
+; X86-SSSE3-NEXT: addl %ecx, %edx
+; X86-SSSE3-NEXT: xorl %ecx, %ecx
+; X86-SSSE3-NEXT: movl %ecx, 12(%eax)
+; X86-SSSE3-NEXT: movl %ecx, 8(%eax)
+; X86-SSSE3-NEXT: movl %ecx, 4(%eax)
+; X86-SSSE3-NEXT: movl %edx, (%eax)
+; X86-SSSE3-NEXT: retl $4
%cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
ret i128 %cnt
}
More information about the llvm-commits
mailing list