[llvm] 9410019 - [X86] Add i8 CTPOP lowering using i32 MUL (#79989)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 02:40:33 PST 2024
Author: Simon Pilgrim
Date: 2024-02-02T10:40:28Z
New Revision: 9410019ac977141bc73aee19690b5896ded59219
URL: https://github.com/llvm/llvm-project/commit/9410019ac977141bc73aee19690b5896ded59219
DIFF: https://github.com/llvm/llvm-project/commit/9410019ac977141bc73aee19690b5896ded59219.diff
LOG: [X86] Add i8 CTPOP lowering using i32 MUL (#79989)
This is the first basic proposal in #79823 - we can investigate improving support for other widths if we can find further use cases.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/ctpop-combine.ll
llvm/test/CodeGen/X86/popcnt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 531e00862558c..de2df5c036f55 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -427,7 +427,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// on the dest that popcntl hasn't had since Cannon Lake.
setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
- setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
+ setOperationAction(ISD::CTPOP , MVT::i8 , Custom);
setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
if (Subtarget.is64Bit())
@@ -30989,12 +30989,12 @@ static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL,
// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
// updated cost models in X86TTIImpl::getIntrinsicInstrCost.
-static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
+static SDValue LowerVectorCTPOP(SDValue Op, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&
"Unknown CTPOP type to handle");
- SDLoc DL(Op.getNode());
SDValue Op0 = Op.getOperand(0);
// TRUNC(CTPOP(ZEXT(X))) to make use of vXi32/vXi64 VPOPCNT instructions.
@@ -31035,9 +31035,27 @@ static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
- assert(Op.getSimpleValueType().isVector() &&
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+
+ // i8 CTPOP - with efficient i32 MUL, then attempt multiply-mask-multiply.
+ if (VT == MVT::i8) {
+ SDValue Mask11 = DAG.getConstant(0x11111111U, DL, MVT::i32);
+ Op = DAG.getZExtOrTrunc(Op.getOperand(0), DL, MVT::i32);
+ Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op,
+ DAG.getConstant(0x08040201U, DL, MVT::i32));
+ Op = DAG.getNode(ISD::SRL, DL, MVT::i32, Op,
+ DAG.getShiftAmountConstant(3, MVT::i32, DL));
+ Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::MUL, DL, MVT::i32, Op, Mask11);
+ Op = DAG.getNode(ISD::SRL, DL, MVT::i32, Op,
+ DAG.getShiftAmountConstant(28, MVT::i32, DL));
+ return DAG.getZExtOrTrunc(Op, DL, VT);
+ }
+
+ assert(VT.isVector() &&
"We only do custom lowering for vector population count.");
- return LowerVectorCTPOP(Op, Subtarget, DAG);
+ return LowerVectorCTPOP(Op, DL, Subtarget, DAG);
}
static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll
index fba44218e0572..73152e9f909cf 100644
--- a/llvm/test/CodeGen/X86/ctpop-combine.ll
+++ b/llvm/test/CodeGen/X86/ctpop-combine.ll
@@ -88,20 +88,13 @@ define i8 @test4(i8 %x) nounwind readnone {
;
; NO-POPCOUNT-LABEL: test4:
; NO-POPCOUNT: # %bb.0:
-; NO-POPCOUNT-NEXT: movl %edi, %ecx
-; NO-POPCOUNT-NEXT: andb $127, %cl
-; NO-POPCOUNT-NEXT: shrb %dil
-; NO-POPCOUNT-NEXT: andb $21, %dil
-; NO-POPCOUNT-NEXT: subb %dil, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: andb $51, %al
-; NO-POPCOUNT-NEXT: shrb $2, %cl
-; NO-POPCOUNT-NEXT: andb $51, %cl
-; NO-POPCOUNT-NEXT: addb %al, %cl
-; NO-POPCOUNT-NEXT: movl %ecx, %eax
-; NO-POPCOUNT-NEXT: shrb $4, %al
-; NO-POPCOUNT-NEXT: addb %cl, %al
-; NO-POPCOUNT-NEXT: andb $15, %al
+; NO-POPCOUNT-NEXT: andl $127, %edi
+; NO-POPCOUNT-NEXT: imull $134480385, %edi, %eax # imm = 0x8040201
+; NO-POPCOUNT-NEXT: shrl $3, %eax
+; NO-POPCOUNT-NEXT: andl $286331153, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; NO-POPCOUNT-NEXT: shrl $28, %eax
+; NO-POPCOUNT-NEXT: # kill: def $al killed $al killed $eax
; NO-POPCOUNT-NEXT: retq
%x2 = and i8 %x, 127
%count = tail call i8 @llvm.ctpop.i8(i8 %x2)
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index a9d77fd2c0a61..37c7b051de7b1 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -1,46 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s --check-prefixes=X86,X86-NOSSE
-; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X86-POPCNT
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefix=X64-POPCNT
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd | FileCheck %s --check-prefix=X64-NDD
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefixes=X64,X64-BASE
+; RUN: llc < %s -mtriple=i686-unknown -mattr=+popcnt | FileCheck %s --check-prefixes=X86-POPCNT
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+popcnt | FileCheck %s --check-prefixes=X64-POPCNT
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ndd | FileCheck %s --check-prefixes=X64,X64-NDD
; RUN: llc < %s -mtriple=i686-unknown -mattr=sse2 | FileCheck %s --check-prefixes=X86,X86-SSE2
; RUN: llc < %s -mtriple=i686-unknown -mattr=ssse3 | FileCheck %s --check-prefixes=X86,X86-SSSE3
define i8 @cnt8(i8 %x) nounwind readnone {
; X86-LABEL: cnt8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb %al
-; X86-NEXT: andb $85, %al
-; X86-NEXT: subb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: andb $51, %al
-; X86-NEXT: shrb $2, %cl
-; X86-NEXT: andb $51, %cl
-; X86-NEXT: addb %al, %cl
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: shrb $4, %al
-; X86-NEXT: addb %cl, %al
-; X86-NEXT: andb $15, %al
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X86-NEXT: shrl $3, %eax
+; X86-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X86-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X86-NEXT: shrl $28, %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: cnt8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrb %al
-; X64-NEXT: andb $85, %al
-; X64-NEXT: subb %al, %dil
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andb $51, %cl
-; X64-NEXT: shrb $2, %dil
-; X64-NEXT: andb $51, %dil
-; X64-NEXT: addb %dil, %cl
-; X64-NEXT: movl %ecx, %eax
-; X64-NEXT: shrb $4, %al
-; X64-NEXT: addb %cl, %al
-; X64-NEXT: andb $15, %al
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: imull $134480385, %eax, %eax # imm = 0x8040201
+; X64-NEXT: shrl $3, %eax
+; X64-NEXT: andl $286331153, %eax # imm = 0x11111111
+; X64-NEXT: imull $286331153, %eax, %eax # imm = 0x11111111
+; X64-NEXT: shrl $28, %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
; X86-POPCNT-LABEL: cnt8:
@@ -56,20 +43,6 @@ define i8 @cnt8(i8 %x) nounwind readnone {
; X64-POPCNT-NEXT: popcntl %eax, %eax
; X64-POPCNT-NEXT: # kill: def $al killed $al killed $eax
; X64-POPCNT-NEXT: retq
-;
-; X64-NDD-LABEL: cnt8:
-; X64-NDD: # %bb.0:
-; X64-NDD-NEXT: shrb %dil, %al
-; X64-NDD-NEXT: andb $85, %al
-; X64-NDD-NEXT: subb %al, %dil, %al
-; X64-NDD-NEXT: andb $51, %al, %cl
-; X64-NDD-NEXT: shrb $2, %al
-; X64-NDD-NEXT: andb $51, %al
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: shrb $4, %al, %cl
-; X64-NDD-NEXT: addb %cl, %al
-; X64-NDD-NEXT: andb $15, %al
-; X64-NDD-NEXT: retq
%cnt = tail call i8 @llvm.ctpop.i8(i8 %x)
ret i8 %cnt
}
@@ -98,27 +71,27 @@ define i16 @cnt16(i16 %x) nounwind readnone {
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
-; X64-LABEL: cnt16:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $13107, %eax # imm = 0x3333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $13107, %edi # imm = 0x3333
-; X64-NEXT: addl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: addl %edi, %eax
-; X64-NEXT: andl $3855, %eax # imm = 0xF0F
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: shrl $8, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt16:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl %eax
+; X64-BASE-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-BASE-NEXT: subl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: andl $13107, %eax # imm = 0x3333
+; X64-BASE-NEXT: shrl $2, %edi
+; X64-BASE-NEXT: andl $13107, %edi # imm = 0x3333
+; X64-BASE-NEXT: addl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl $4, %eax
+; X64-BASE-NEXT: addl %edi, %eax
+; X64-BASE-NEXT: andl $3855, %eax # imm = 0xF0F
+; X64-BASE-NEXT: movl %eax, %ecx
+; X64-BASE-NEXT: shrl $8, %ecx
+; X64-BASE-NEXT: addl %eax, %ecx
+; X64-BASE-NEXT: movzbl %cl, %eax
+; X64-BASE-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt16:
; X86-POPCNT: # %bb.0:
@@ -176,24 +149,24 @@ define i32 @cnt32(i32 %x) nounwind readnone {
; X86-NEXT: shrl $24, %eax
; X86-NEXT: retl
;
-; X64-LABEL: cnt32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: addl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: addl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X64-NEXT: shrl $24, %eax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt32:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl %eax
+; X64-BASE-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-BASE-NEXT: subl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-BASE-NEXT: shrl $2, %edi
+; X64-BASE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-BASE-NEXT: addl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl $4, %eax
+; X64-BASE-NEXT: addl %edi, %eax
+; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-BASE-NEXT: shrl $24, %eax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt32:
; X86-POPCNT: # %bb.0:
@@ -263,28 +236,28 @@ define i64 @cnt64(i64 %x) nounwind readnone {
; X86-NOSSE-NEXT: xorl %edx, %edx
; X86-NOSSE-NEXT: retl
;
-; X64-LABEL: cnt64:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: subq %rcx, %rdi
-; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: shrq $4, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; X64-NEXT: imulq %rdx, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt64:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: subq %rcx, %rdi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rdi, %rcx
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: movq %rax, %rcx
+; X64-BASE-NEXT: shrq $4, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rcx, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %rdx, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt64:
; X86-POPCNT: # %bb.0:
@@ -447,45 +420,45 @@ define i128 @cnt128(i128 %x) nounwind readnone {
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: retl $4
;
-; X64-LABEL: cnt128:
-; X64: # %bb.0:
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
-; X64-NEXT: andq %r8, %rax
-; X64-NEXT: subq %rax, %rsi
-; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rsi
-; X64-NEXT: andq %rcx, %rsi
-; X64-NEXT: addq %rsi, %rax
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: shrq $4, %rdx
-; X64-NEXT: addq %rax, %rdx
-; X64-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rsi, %rdx
-; X64-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
-; X64-NEXT: imulq %r9, %rdx
-; X64-NEXT: shrq $56, %rdx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: andq %r8, %rax
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: andq %rsi, %rax
-; X64-NEXT: imulq %r9, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: addq %rdx, %rax
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt128:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rsi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %r8, %rax
+; X64-BASE-NEXT: subq %rax, %rsi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rsi, %rax
+; X64-BASE-NEXT: andq %rcx, %rax
+; X64-BASE-NEXT: shrq $2, %rsi
+; X64-BASE-NEXT: andq %rcx, %rsi
+; X64-BASE-NEXT: addq %rsi, %rax
+; X64-BASE-NEXT: movq %rax, %rdx
+; X64-BASE-NEXT: shrq $4, %rdx
+; X64-BASE-NEXT: addq %rax, %rdx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rsi, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %r9, %rdx
+; X64-BASE-NEXT: shrq $56, %rdx
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: andq %r8, %rax
+; X64-BASE-NEXT: subq %rax, %rdi
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: andq %rcx, %rax
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movq %rcx, %rax
+; X64-BASE-NEXT: shrq $4, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: andq %rsi, %rax
+; X64-BASE-NEXT: imulq %r9, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: addq %rdx, %rax
+; X64-BASE-NEXT: xorl %edx, %edx
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt128:
; X86-POPCNT: # %bb.0:
@@ -671,28 +644,28 @@ define i64 @cnt64_noimplicitfloat(i64 %x) nounwind readnone noimplicitfloat {
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: retl
;
-; X64-LABEL: cnt64_noimplicitfloat:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: subq %rcx, %rdi
-; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: shrq $4, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; X64-NEXT: imulq %rdx, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt64_noimplicitfloat:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: subq %rcx, %rdi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rdi, %rcx
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: movq %rax, %rcx
+; X64-BASE-NEXT: shrq $4, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rcx, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %rdx, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt64_noimplicitfloat:
; X86-POPCNT: # %bb.0:
@@ -752,25 +725,25 @@ define i32 @cnt32_optsize(i32 %x) nounwind readnone optsize {
; X86-NEXT: shrl $24, %eax
; X86-NEXT: retl
;
-; X64-LABEL: cnt32_optsize:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: andl %eax, %ecx
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl %eax, %edi
-; X64-NEXT: addl %ecx, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: addl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X64-NEXT: shrl $24, %eax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt32_optsize:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl %eax
+; X64-BASE-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-BASE-NEXT: subl %eax, %edi
+; X64-BASE-NEXT: movl $858993459, %eax # imm = 0x33333333
+; X64-BASE-NEXT: movl %edi, %ecx
+; X64-BASE-NEXT: andl %eax, %ecx
+; X64-BASE-NEXT: shrl $2, %edi
+; X64-BASE-NEXT: andl %eax, %edi
+; X64-BASE-NEXT: addl %ecx, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl $4, %eax
+; X64-BASE-NEXT: addl %edi, %eax
+; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-BASE-NEXT: shrl $24, %eax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt32_optsize:
; X86-POPCNT: # %bb.0:
@@ -850,28 +823,28 @@ define i64 @cnt64_optsize(i64 %x) nounwind readnone optsize {
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: retl
;
-; X64-LABEL: cnt64_optsize:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: subq %rcx, %rdi
-; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: shrq $4, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; X64-NEXT: imulq %rdx, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt64_optsize:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: subq %rcx, %rdi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rdi, %rcx
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: movq %rax, %rcx
+; X64-BASE-NEXT: shrq $4, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rcx, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %rdx, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt64_optsize:
; X86-POPCNT: # %bb.0:
@@ -1042,45 +1015,45 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
; X86-NOSSE-NEXT: popl %ebp
; X86-NOSSE-NEXT: retl $4
;
-; X64-LABEL: cnt128_optsize:
-; X64: # %bb.0:
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
-; X64-NEXT: andq %r8, %rax
-; X64-NEXT: subq %rax, %rsi
-; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rsi
-; X64-NEXT: andq %rcx, %rsi
-; X64-NEXT: addq %rsi, %rax
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: shrq $4, %rdx
-; X64-NEXT: addq %rax, %rdx
-; X64-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rsi, %rdx
-; X64-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
-; X64-NEXT: imulq %r9, %rdx
-; X64-NEXT: shrq $56, %rdx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: andq %r8, %rax
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: andq %rsi, %rax
-; X64-NEXT: imulq %r9, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: addq %rdx, %rax
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt128_optsize:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rsi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %r8, %rax
+; X64-BASE-NEXT: subq %rax, %rsi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rsi, %rax
+; X64-BASE-NEXT: andq %rcx, %rax
+; X64-BASE-NEXT: shrq $2, %rsi
+; X64-BASE-NEXT: andq %rcx, %rsi
+; X64-BASE-NEXT: addq %rsi, %rax
+; X64-BASE-NEXT: movq %rax, %rdx
+; X64-BASE-NEXT: shrq $4, %rdx
+; X64-BASE-NEXT: addq %rax, %rdx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rsi, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %r9, %rdx
+; X64-BASE-NEXT: shrq $56, %rdx
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: andq %r8, %rax
+; X64-BASE-NEXT: subq %rax, %rdi
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: andq %rcx, %rax
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movq %rcx, %rax
+; X64-BASE-NEXT: shrq $4, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: andq %rsi, %rax
+; X64-BASE-NEXT: imulq %r9, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: addq %rdx, %rax
+; X64-BASE-NEXT: xorl %edx, %edx
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt128_optsize:
; X86-POPCNT: # %bb.0:
@@ -1251,24 +1224,24 @@ define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
; X86-NEXT: shrl $24, %eax
; X86-NEXT: retl
;
-; X64-LABEL: cnt32_pgso:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: addl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: addl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X64-NEXT: shrl $24, %eax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt32_pgso:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl %eax
+; X64-BASE-NEXT: andl $1431655765, %eax # imm = 0x55555555
+; X64-BASE-NEXT: subl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-BASE-NEXT: shrl $2, %edi
+; X64-BASE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-BASE-NEXT: addl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl $4, %eax
+; X64-BASE-NEXT: addl %edi, %eax
+; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-BASE-NEXT: shrl $24, %eax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt32_pgso:
; X86-POPCNT: # %bb.0:
@@ -1338,28 +1311,28 @@ define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
; X86-NOSSE-NEXT: xorl %edx, %edx
; X86-NOSSE-NEXT: retl
;
-; X64-LABEL: cnt64_pgso:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: subq %rcx, %rdi
-; X64-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: andq %rax, %rcx
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: movq %rax, %rcx
-; X64-NEXT: shrq $4, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rcx, %rdx
-; X64-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
-; X64-NEXT: imulq %rdx, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt64_pgso:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: subq %rcx, %rdi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rdi, %rcx
+; X64-BASE-NEXT: andq %rax, %rcx
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: movq %rax, %rcx
+; X64-BASE-NEXT: shrq $4, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rdx # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rcx, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %rax # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %rdx, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt64_pgso:
; X86-POPCNT: # %bb.0:
@@ -1523,45 +1496,45 @@ define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
; X86-NOSSE-NEXT: popl %ebx
; X86-NOSSE-NEXT: retl $4
;
-; X64-LABEL: cnt128_pgso:
-; X64: # %bb.0:
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
-; X64-NEXT: andq %r8, %rax
-; X64-NEXT: subq %rax, %rsi
-; X64-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rsi
-; X64-NEXT: andq %rcx, %rsi
-; X64-NEXT: addq %rsi, %rax
-; X64-NEXT: movq %rax, %rdx
-; X64-NEXT: shrq $4, %rdx
-; X64-NEXT: addq %rax, %rdx
-; X64-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT: andq %rsi, %rdx
-; X64-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
-; X64-NEXT: imulq %r9, %rdx
-; X64-NEXT: shrq $56, %rdx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: andq %r8, %rax
-; X64-NEXT: subq %rax, %rdi
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: andq %rcx, %rax
-; X64-NEXT: shrq $2, %rdi
-; X64-NEXT: andq %rdi, %rcx
-; X64-NEXT: addq %rax, %rcx
-; X64-NEXT: movq %rcx, %rax
-; X64-NEXT: shrq $4, %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: andq %rsi, %rax
-; X64-NEXT: imulq %r9, %rax
-; X64-NEXT: shrq $56, %rax
-; X64-NEXT: addq %rdx, %rax
-; X64-NEXT: xorl %edx, %edx
-; X64-NEXT: retq
+; X64-BASE-LABEL: cnt128_pgso:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movq %rsi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
+; X64-BASE-NEXT: andq %r8, %rax
+; X64-BASE-NEXT: subq %rax, %rsi
+; X64-BASE-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333
+; X64-BASE-NEXT: movq %rsi, %rax
+; X64-BASE-NEXT: andq %rcx, %rax
+; X64-BASE-NEXT: shrq $2, %rsi
+; X64-BASE-NEXT: andq %rcx, %rsi
+; X64-BASE-NEXT: addq %rsi, %rax
+; X64-BASE-NEXT: movq %rax, %rdx
+; X64-BASE-NEXT: shrq $4, %rdx
+; X64-BASE-NEXT: addq %rax, %rdx
+; X64-BASE-NEXT: movabsq $1085102592571150095, %rsi # imm = 0xF0F0F0F0F0F0F0F
+; X64-BASE-NEXT: andq %rsi, %rdx
+; X64-BASE-NEXT: movabsq $72340172838076673, %r9 # imm = 0x101010101010101
+; X64-BASE-NEXT: imulq %r9, %rdx
+; X64-BASE-NEXT: shrq $56, %rdx
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: shrq %rax
+; X64-BASE-NEXT: andq %r8, %rax
+; X64-BASE-NEXT: subq %rax, %rdi
+; X64-BASE-NEXT: movq %rdi, %rax
+; X64-BASE-NEXT: andq %rcx, %rax
+; X64-BASE-NEXT: shrq $2, %rdi
+; X64-BASE-NEXT: andq %rdi, %rcx
+; X64-BASE-NEXT: addq %rax, %rcx
+; X64-BASE-NEXT: movq %rcx, %rax
+; X64-BASE-NEXT: shrq $4, %rax
+; X64-BASE-NEXT: addq %rcx, %rax
+; X64-BASE-NEXT: andq %rsi, %rax
+; X64-BASE-NEXT: imulq %r9, %rax
+; X64-BASE-NEXT: shrq $56, %rax
+; X64-BASE-NEXT: addq %rdx, %rax
+; X64-BASE-NEXT: xorl %edx, %edx
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: cnt128_pgso:
; X86-POPCNT: # %bb.0:
@@ -1732,24 +1705,24 @@ define i32 @popcount_zext_i32(i16 zeroext %x) {
; X86-NEXT: shrl $24, %eax
; X86-NEXT: retl
;
-; X64-LABEL: popcount_zext_i32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $858993459, %edi # imm = 0x33333333
-; X64-NEXT: addl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: addl %edi, %eax
-; X64-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
-; X64-NEXT: shrl $24, %eax
-; X64-NEXT: retq
+; X64-BASE-LABEL: popcount_zext_i32:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl %eax
+; X64-BASE-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-BASE-NEXT: subl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: andl $858993459, %eax # imm = 0x33333333
+; X64-BASE-NEXT: shrl $2, %edi
+; X64-BASE-NEXT: andl $858993459, %edi # imm = 0x33333333
+; X64-BASE-NEXT: addl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl $4, %eax
+; X64-BASE-NEXT: addl %edi, %eax
+; X64-BASE-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-BASE-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-BASE-NEXT: shrl $24, %eax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: popcount_zext_i32:
; X86-POPCNT: # %bb.0:
@@ -1805,26 +1778,26 @@ define i32 @popcount_i16_zext(i16 zeroext %x) {
; X86-NEXT: movzbl %al, %eax
; X86-NEXT: retl
;
-; X64-LABEL: popcount_i16_zext:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl %eax
-; X64-NEXT: andl $21845, %eax # imm = 0x5555
-; X64-NEXT: subl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: andl $13107, %eax # imm = 0x3333
-; X64-NEXT: shrl $2, %edi
-; X64-NEXT: andl $13107, %edi # imm = 0x3333
-; X64-NEXT: addl %eax, %edi
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrl $4, %eax
-; X64-NEXT: addl %edi, %eax
-; X64-NEXT: andl $3855, %eax # imm = 0xF0F
-; X64-NEXT: movl %eax, %ecx
-; X64-NEXT: shrl $8, %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: movzbl %cl, %eax
-; X64-NEXT: retq
+; X64-BASE-LABEL: popcount_i16_zext:
+; X64-BASE: # %bb.0:
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl %eax
+; X64-BASE-NEXT: andl $21845, %eax # imm = 0x5555
+; X64-BASE-NEXT: subl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: andl $13107, %eax # imm = 0x3333
+; X64-BASE-NEXT: shrl $2, %edi
+; X64-BASE-NEXT: andl $13107, %edi # imm = 0x3333
+; X64-BASE-NEXT: addl %eax, %edi
+; X64-BASE-NEXT: movl %edi, %eax
+; X64-BASE-NEXT: shrl $4, %eax
+; X64-BASE-NEXT: addl %edi, %eax
+; X64-BASE-NEXT: andl $3855, %eax # imm = 0xF0F
+; X64-BASE-NEXT: movl %eax, %ecx
+; X64-BASE-NEXT: shrl $8, %ecx
+; X64-BASE-NEXT: addl %eax, %ecx
+; X64-BASE-NEXT: movzbl %cl, %eax
+; X64-BASE-NEXT: retq
;
; X86-POPCNT-LABEL: popcount_i16_zext:
; X86-POPCNT: # %bb.0:
More information about the llvm-commits
mailing list