[llvm] 9e09c31 - [LoongArch] Add codegen support for ISD::CTPOP, ISD::CTTZ and ISD::CTLZ
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 11 23:15:48 PDT 2022
Author: gonglingqin
Date: 2022-08-12T14:15:30+08:00
New Revision: 9e09c3186ed287fc9369faa09094473d50e3c38e
URL: https://github.com/llvm/llvm-project/commit/9e09c3186ed287fc9369faa09094473d50e3c38e
DIFF: https://github.com/llvm/llvm-project/commit/9e09c3186ed287fc9369faa09094473d50e3c38e.diff
LOG: [LoongArch] Add codegen support for ISD::CTPOP, ISD::CTTZ and ISD::CTLZ
Differential Revision: https://reviews.llvm.org/D131550
Added:
llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index fcb197a0fac71..ee0684634f912 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -53,6 +53,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
setOperationAction(ISD::ROTL, GRLenVT, Expand);
+ setOperationAction(ISD::CTPOP, GRLenVT, Expand);
setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom);
@@ -68,6 +69,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
setOperationAction(ISD::ROTR, MVT::i32, Custom);
setOperationAction(ISD::ROTL, MVT::i32, Custom);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTLZ, MVT::i32, Custom);
if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
}
@@ -377,6 +380,10 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
return LoongArchISD::ROTR_W;
case ISD::ROTL:
return LoongArchISD::ROTL_W;
+ case ISD::CTTZ:
+ return LoongArchISD::CTZ_W;
+ case ISD::CTLZ:
+ return LoongArchISD::CLZ_W;
}
}
@@ -385,14 +392,31 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
// otherwise be promoted to i64, making it
diff icult to select the
// SLL_W/.../*W later one because the fact the operation was originally of
// type i8/i16/i32 is lost.
-static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
+static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
unsigned ExtOpc = ISD::ANY_EXTEND) {
SDLoc DL(N);
LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
- SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
- SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
- SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
- // ReplaceNodeResults requires we maintain the same type for the return value.
+ SDValue NewOp0, NewRes;
+
+ switch (NumOp) {
+ default:
+ llvm_unreachable("Unexpected NumOp");
+ case 1: {
+ NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
+ NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
+ break;
+ }
+ case 2: {
+ NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
+ SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
+ NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
+ break;
+ }
+ // TODO:Handle more NumOp.
+ }
+
+ // ReplaceNodeResults requires we maintain the same type for the return
+ // value.
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
@@ -409,14 +433,14 @@ void LoongArchTargetLowering::ReplaceNodeResults(
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() != ISD::Constant) {
- Results.push_back(customLegalizeToWOp(N, DAG));
+ Results.push_back(customLegalizeToWOp(N, DAG, 2));
break;
}
break;
case ISD::ROTL:
ConstantSDNode *CN;
if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
- Results.push_back(customLegalizeToWOp(N, DAG));
+ Results.push_back(customLegalizeToWOp(N, DAG, 2));
break;
}
break;
@@ -494,6 +518,13 @@ void LoongArchTargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
break;
}
+ case ISD::CTLZ:
+ case ISD::CTTZ: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+ Results.push_back(customLegalizeToWOp(N, DAG, 1));
+ break;
+ }
}
}
@@ -946,6 +977,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BITREV_W)
NODE_NAME_CASE(ROTR_W)
NODE_NAME_CASE(ROTL_W)
+ NODE_NAME_CASE(CLZ_W)
+ NODE_NAME_CASE(CTZ_W)
}
#undef NODE_NAME_CASE
return nullptr;
@@ -1682,3 +1715,7 @@ bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
return false;
return (Imm.isZero() || Imm.isExactlyValue(+1.0));
}
+
+bool LoongArchTargetLowering::isCheapToSpeculateCttz() const { return true; }
+
+bool LoongArchTargetLowering::isCheapToSpeculateCtlz() const { return true; }
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index a10235d3b964e..7f5a29d8ca0db 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -44,6 +44,10 @@ enum NodeType : unsigned {
FTINT,
+ // Bit counting operations
+ CLZ_W,
+ CTZ_W,
+
BSTRINS,
BSTRPICK,
@@ -92,6 +96,8 @@ class LoongArchTargetLowering : public TargetLowering {
SelectionDAG &DAG) const override;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ bool isCheapToSpeculateCttz() const override;
+ bool isCheapToSpeculateCtlz() const override;
private:
/// Target-specific function used to lower LoongArch calling conventions.
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 33502141345ae..ae4e73211ec7f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -61,6 +61,8 @@ def loongarch_revb_2h : SDNode<"LoongArchISD::REVB_2H", SDTUnaryOp>;
def loongarch_revb_2w : SDNode<"LoongArchISD::REVB_2W", SDTUnaryOp>;
def loongarch_bitrev_4b : SDNode<"LoongArchISD::BITREV_4B", SDTUnaryOp>;
def loongarch_bitrev_w : SDNode<"LoongArchISD::BITREV_W", SDTUnaryOp>;
+def loongarch_clzw : SDNode<"LoongArchISD::CLZ_W", SDTIntBitCountUnaryOp>;
+def loongarch_ctzw : SDNode<"LoongArchISD::CTZ_W", SDTIntBitCountUnaryOp>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -591,6 +593,8 @@ class PatGprGpr<SDPatternOperator OpNode, LAInst Inst>
: Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>;
class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst>
: Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>;
+class PatGpr<SDPatternOperator OpNode, LAInst Inst>
+ : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>;
class PatGprImm<SDPatternOperator OpNode, LAInst Inst, Operand ImmOpnd>
: Pat<(OpNode GPR:$rj, ImmOpnd:$imm),
@@ -686,6 +690,26 @@ def : Pat<(not (or GPR:$rj, GPR:$rk)), (NOR GPR:$rj, GPR:$rk)>;
def : Pat<(or GPR:$rj, (not GPR:$rk)), (ORN GPR:$rj, GPR:$rk)>;
def : Pat<(and GPR:$rj, (not GPR:$rk)), (ANDN GPR:$rj, GPR:$rk)>;
+/// Bit counting operations
+
+let Predicates = [IsLA64] in {
+def : PatGpr<ctlz, CLZ_D>;
+def : PatGpr<cttz, CTZ_D>;
+def : Pat<(ctlz (not GPR:$rj)), (CLO_D GPR:$rj)>;
+def : Pat<(cttz (not GPR:$rj)), (CTO_D GPR:$rj)>;
+def : PatGpr<loongarch_clzw, CLZ_W>;
+def : PatGpr<loongarch_ctzw, CTZ_W>;
+def : Pat<(loongarch_clzw (not GPR:$rj)), (CLO_W GPR:$rj)>;
+def : Pat<(loongarch_ctzw (not GPR:$rj)), (CTO_W GPR:$rj)>;
+} // Predicates = [IsLA64]
+
+let Predicates = [IsLA32] in {
+def : PatGpr<ctlz, CLZ_W>;
+def : PatGpr<cttz, CTZ_W>;
+def : Pat<(ctlz (not GPR:$rj)), (CLO_W GPR:$rj)>;
+def : Pat<(cttz (not GPR:$rj)), (CTO_W GPR:$rj)>;
+} // Predicates = [IsLA32]
+
/// FrameIndex calculations
let Predicates = [IsLA32] in {
def : Pat<(AddLike (i32 BaseAddr:$rj), simm12:$imm12),
diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
new file mode 100644
index 0000000000000..fa4fda9b8972b
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll
@@ -0,0 +1,514 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+declare i8 @llvm.ctlz.i8(i8, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i8 @llvm.ctpop.i8(i8)
+declare i16 @llvm.ctpop.i16(i16)
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
+declare i8 @llvm.cttz.i8(i8, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i8 @test_ctlz_i8(i8 %a) nounwind {
+; LA32-LABEL: test_ctlz_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: andi $a0, $a0, 255
+; LA32-NEXT: clz.w $a0, $a0
+; LA32-NEXT: addi.w $a0, $a0, -24
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctlz_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: andi $a0, $a0, 255
+; LA64-NEXT: clz.d $a0, $a0
+; LA64-NEXT: addi.d $a0, $a0, -56
+; LA64-NEXT: ret
+ %tmp = call i8 @llvm.ctlz.i8(i8 %a, i1 false)
+ ret i8 %tmp
+}
+
+define i16 @test_ctlz_i16(i16 %a) nounwind {
+; LA32-LABEL: test_ctlz_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT: clz.w $a0, $a0
+; LA32-NEXT: addi.w $a0, $a0, -16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctlz_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT: clz.d $a0, $a0
+; LA64-NEXT: addi.d $a0, $a0, -48
+; LA64-NEXT: ret
+ %tmp = call i16 @llvm.ctlz.i16(i16 %a, i1 false)
+ ret i16 %tmp
+}
+
+define i32 @test_ctlz_i32(i32 %a) nounwind {
+; LA32-LABEL: test_ctlz_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: clz.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctlz_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: clz.w $a0, $a0
+; LA64-NEXT: ret
+ %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+ ret i32 %tmp
+}
+
+define i64 @test_ctlz_i64(i64 %a) nounwind {
+; LA32-LABEL: test_ctlz_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: sltu $a2, $zero, $a1
+; LA32-NEXT: clz.w $a1, $a1
+; LA32-NEXT: maskeqz $a1, $a1, $a2
+; LA32-NEXT: clz.w $a0, $a0
+; LA32-NEXT: addi.w $a0, $a0, 32
+; LA32-NEXT: masknez $a0, $a0, $a2
+; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: move $a1, $zero
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctlz_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: clz.d $a0, $a0
+; LA64-NEXT: ret
+ %tmp = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+ ret i64 %tmp
+}
+
+define i8 @test_not_ctlz_i8(i8 %a) nounwind {
+; LA32-LABEL: test_not_ctlz_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ori $a1, $zero, 255
+; LA32-NEXT: andn $a0, $a1, $a0
+; LA32-NEXT: clz.w $a0, $a0
+; LA32-NEXT: addi.w $a0, $a0, -24
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_ctlz_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ori $a1, $zero, 255
+; LA64-NEXT: andn $a0, $a1, $a0
+; LA64-NEXT: clz.d $a0, $a0
+; LA64-NEXT: addi.d $a0, $a0, -56
+; LA64-NEXT: ret
+ %neg = xor i8 %a, -1
+ %tmp = call i8 @llvm.ctlz.i8(i8 %neg, i1 false)
+ ret i8 %tmp
+}
+
+define i16 @test_not_ctlz_i16(i16 %a) nounwind {
+; LA32-LABEL: test_not_ctlz_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: nor $a0, $a0, $zero
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT: clz.w $a0, $a0
+; LA32-NEXT: addi.w $a0, $a0, -16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_ctlz_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: nor $a0, $a0, $zero
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT: clz.d $a0, $a0
+; LA64-NEXT: addi.d $a0, $a0, -48
+; LA64-NEXT: ret
+ %neg = xor i16 %a, -1
+ %tmp = call i16 @llvm.ctlz.i16(i16 %neg, i1 false)
+ ret i16 %tmp
+}
+
+define i32 @test_not_ctlz_i32(i32 %a) nounwind {
+; LA32-LABEL: test_not_ctlz_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: clo.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_ctlz_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: clo.w $a0, $a0
+; LA64-NEXT: ret
+ %neg = xor i32 %a, -1
+ %tmp = call i32 @llvm.ctlz.i32(i32 %neg, i1 false)
+ ret i32 %tmp
+}
+
+define i64 @test_not_ctlz_i64(i64 %a) nounwind {
+; LA32-LABEL: test_not_ctlz_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: nor $a2, $a1, $zero
+; LA32-NEXT: sltu $a2, $zero, $a2
+; LA32-NEXT: clo.w $a0, $a0
+; LA32-NEXT: addi.w $a0, $a0, 32
+; LA32-NEXT: masknez $a0, $a0, $a2
+; LA32-NEXT: clo.w $a1, $a1
+; LA32-NEXT: maskeqz $a1, $a1, $a2
+; LA32-NEXT: or $a0, $a1, $a0
+; LA32-NEXT: move $a1, $zero
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_ctlz_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: clo.d $a0, $a0
+; LA64-NEXT: ret
+ %neg = xor i64 %a, -1
+ %tmp = call i64 @llvm.ctlz.i64(i64 %neg, i1 false)
+ ret i64 %tmp
+}
+
+define i8 @test_ctpop_i8(i8 %a) nounwind {
+; LA32-LABEL: test_ctpop_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: srli.w $a1, $a0, 1
+; LA32-NEXT: andi $a1, $a1, 85
+; LA32-NEXT: sub.w $a0, $a0, $a1
+; LA32-NEXT: andi $a1, $a0, 51
+; LA32-NEXT: srli.w $a0, $a0, 2
+; LA32-NEXT: andi $a0, $a0, 51
+; LA32-NEXT: add.w $a0, $a1, $a0
+; LA32-NEXT: srli.w $a1, $a0, 4
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: andi $a0, $a0, 15
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctpop_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: srli.d $a1, $a0, 1
+; LA64-NEXT: andi $a1, $a1, 85
+; LA64-NEXT: sub.d $a0, $a0, $a1
+; LA64-NEXT: andi $a1, $a0, 51
+; LA64-NEXT: srli.d $a0, $a0, 2
+; LA64-NEXT: andi $a0, $a0, 51
+; LA64-NEXT: add.d $a0, $a1, $a0
+; LA64-NEXT: srli.d $a1, $a0, 4
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: andi $a0, $a0, 15
+; LA64-NEXT: ret
+ %1 = call i8 @llvm.ctpop.i8(i8 %a)
+ ret i8 %1
+}
+
+define i16 @test_ctpop_i16(i16 %a) nounwind {
+; LA32-LABEL: test_ctpop_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: lu12i.w $a1, 5
+; LA32-NEXT: ori $a1, $a1, 1365
+; LA32-NEXT: srli.w $a2, $a0, 1
+; LA32-NEXT: and $a1, $a2, $a1
+; LA32-NEXT: sub.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a1, 3
+; LA32-NEXT: ori $a1, $a1, 819
+; LA32-NEXT: and $a2, $a0, $a1
+; LA32-NEXT: srli.w $a0, $a0, 2
+; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: add.w $a0, $a2, $a0
+; LA32-NEXT: srli.w $a1, $a0, 4
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8
+; LA32-NEXT: andi $a0, $a0, 15
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctpop_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: lu12i.w $a1, 5
+; LA64-NEXT: ori $a1, $a1, 1365
+; LA64-NEXT: srli.d $a2, $a0, 1
+; LA64-NEXT: and $a1, $a2, $a1
+; LA64-NEXT: sub.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 3
+; LA64-NEXT: ori $a1, $a1, 819
+; LA64-NEXT: and $a2, $a0, $a1
+; LA64-NEXT: srli.d $a0, $a0, 2
+; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: srli.d $a1, $a0, 4
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8
+; LA64-NEXT: andi $a0, $a0, 15
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: ret
+ %1 = call i16 @llvm.ctpop.i16(i16 %a)
+ ret i16 %1
+}
+
+define i32 @test_ctpop_i32(i32 %a) nounwind {
+; LA32-LABEL: test_ctpop_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: lu12i.w $a1, 349525
+; LA32-NEXT: ori $a1, $a1, 1365
+; LA32-NEXT: srli.w $a2, $a0, 1
+; LA32-NEXT: and $a1, $a2, $a1
+; LA32-NEXT: sub.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a1, 209715
+; LA32-NEXT: ori $a1, $a1, 819
+; LA32-NEXT: and $a2, $a0, $a1
+; LA32-NEXT: srli.w $a0, $a0, 2
+; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: add.w $a0, $a2, $a0
+; LA32-NEXT: srli.w $a1, $a0, 4
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a1, 61680
+; LA32-NEXT: ori $a1, $a1, 3855
+; LA32-NEXT: and $a0, $a0, $a1
+; LA32-NEXT: lu12i.w $a1, 4112
+; LA32-NEXT: ori $a1, $a1, 257
+; LA32-NEXT: mul.w $a0, $a0, $a1
+; LA32-NEXT: srli.w $a0, $a0, 24
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctpop_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: lu12i.w $a1, 349525
+; LA64-NEXT: ori $a1, $a1, 1365
+; LA64-NEXT: srli.d $a2, $a0, 1
+; LA64-NEXT: and $a1, $a2, $a1
+; LA64-NEXT: sub.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 209715
+; LA64-NEXT: ori $a1, $a1, 819
+; LA64-NEXT: and $a2, $a0, $a1
+; LA64-NEXT: srli.d $a0, $a0, 2
+; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: srli.d $a1, $a0, 4
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 61680
+; LA64-NEXT: ori $a1, $a1, 3855
+; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 4112
+; LA64-NEXT: ori $a1, $a1, 257
+; LA64-NEXT: mul.d $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24
+; LA64-NEXT: ret
+ %1 = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %1
+}
+
+define i64 @test_ctpop_i64(i64 %a) nounwind {
+; LA32-LABEL: test_ctpop_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: lu12i.w $a2, 349525
+; LA32-NEXT: ori $a2, $a2, 1365
+; LA32-NEXT: srli.w $a3, $a0, 1
+; LA32-NEXT: and $a3, $a3, $a2
+; LA32-NEXT: sub.w $a0, $a0, $a3
+; LA32-NEXT: lu12i.w $a3, 209715
+; LA32-NEXT: ori $a3, $a3, 819
+; LA32-NEXT: and $a4, $a0, $a3
+; LA32-NEXT: srli.w $a0, $a0, 2
+; LA32-NEXT: and $a0, $a0, $a3
+; LA32-NEXT: add.w $a0, $a4, $a0
+; LA32-NEXT: srli.w $a4, $a1, 1
+; LA32-NEXT: and $a2, $a4, $a2
+; LA32-NEXT: sub.w $a1, $a1, $a2
+; LA32-NEXT: srli.w $a2, $a0, 4
+; LA32-NEXT: add.w $a0, $a0, $a2
+; LA32-NEXT: and $a2, $a1, $a3
+; LA32-NEXT: srli.w $a1, $a1, 2
+; LA32-NEXT: and $a1, $a1, $a3
+; LA32-NEXT: add.w $a1, $a2, $a1
+; LA32-NEXT: srli.w $a2, $a1, 4
+; LA32-NEXT: add.w $a1, $a1, $a2
+; LA32-NEXT: lu12i.w $a2, 61680
+; LA32-NEXT: ori $a2, $a2, 3855
+; LA32-NEXT: and $a1, $a1, $a2
+; LA32-NEXT: and $a0, $a0, $a2
+; LA32-NEXT: lu12i.w $a2, 4112
+; LA32-NEXT: ori $a2, $a2, 257
+; LA32-NEXT: mul.w $a0, $a0, $a2
+; LA32-NEXT: mul.w $a1, $a1, $a2
+; LA32-NEXT: srli.w $a1, $a1, 24
+; LA32-NEXT: srli.w $a0, $a0, 24
+; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: move $a1, $zero
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_ctpop_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: lu12i.w $a1, 349525
+; LA64-NEXT: ori $a1, $a1, 1365
+; LA64-NEXT: lu32i.d $a1, 349525
+; LA64-NEXT: lu52i.d $a1, $a1, 1365
+; LA64-NEXT: srli.d $a2, $a0, 1
+; LA64-NEXT: and $a1, $a2, $a1
+; LA64-NEXT: sub.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 209715
+; LA64-NEXT: ori $a1, $a1, 819
+; LA64-NEXT: lu32i.d $a1, 209715
+; LA64-NEXT: lu52i.d $a1, $a1, 819
+; LA64-NEXT: and $a2, $a0, $a1
+; LA64-NEXT: srli.d $a0, $a0, 2
+; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: add.d $a0, $a2, $a0
+; LA64-NEXT: srli.d $a1, $a0, 4
+; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 61680
+; LA64-NEXT: ori $a1, $a1, 3855
+; LA64-NEXT: lu32i.d $a1, -61681
+; LA64-NEXT: lu52i.d $a1, $a1, 240
+; LA64-NEXT: and $a0, $a0, $a1
+; LA64-NEXT: lu12i.w $a1, 4112
+; LA64-NEXT: ori $a1, $a1, 257
+; LA64-NEXT: lu32i.d $a1, 65793
+; LA64-NEXT: lu52i.d $a1, $a1, 16
+; LA64-NEXT: mul.d $a0, $a0, $a1
+; LA64-NEXT: srli.d $a0, $a0, 56
+; LA64-NEXT: ret
+ %1 = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %1
+}
+
+define i8 @test_cttz_i8(i8 %a) nounwind {
+; LA32-LABEL: test_cttz_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ori $a0, $a0, 256
+; LA32-NEXT: ctz.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_cttz_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ori $a0, $a0, 256
+; LA64-NEXT: ctz.d $a0, $a0
+; LA64-NEXT: ret
+ %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false)
+ ret i8 %tmp
+}
+
+define i16 @test_cttz_i16(i16 %a) nounwind {
+; LA32-LABEL: test_cttz_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: lu12i.w $a1, 16
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: ctz.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_cttz_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: lu12i.w $a1, 16
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: ctz.d $a0, $a0
+; LA64-NEXT: ret
+ %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false)
+ ret i16 %tmp
+}
+
+define i32 @test_cttz_i32(i32 %a) nounwind {
+; LA32-LABEL: test_cttz_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: ctz.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_cttz_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: ctz.w $a0, $a0
+; LA64-NEXT: ret
+ %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+ ret i32 %tmp
+}
+
+define i64 @test_cttz_i64(i64 %a) nounwind {
+; LA32-LABEL: test_cttz_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: sltu $a2, $zero, $a0
+; LA32-NEXT: ctz.w $a0, $a0
+; LA32-NEXT: maskeqz $a0, $a0, $a2
+; LA32-NEXT: ctz.w $a1, $a1
+; LA32-NEXT: addi.w $a1, $a1, 32
+; LA32-NEXT: masknez $a1, $a1, $a2
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: move $a1, $zero
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_cttz_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: ctz.d $a0, $a0
+; LA64-NEXT: ret
+ %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+ ret i64 %tmp
+}
+
+define i8 @test_not_cttz_i8(i8 %a) nounwind {
+; LA32-LABEL: test_not_cttz_i8:
+; LA32: # %bb.0:
+; LA32-NEXT: ori $a1, $zero, 256
+; LA32-NEXT: orn $a0, $a1, $a0
+; LA32-NEXT: ctz.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_cttz_i8:
+; LA64: # %bb.0:
+; LA64-NEXT: ori $a1, $zero, 256
+; LA64-NEXT: orn $a0, $a1, $a0
+; LA64-NEXT: ctz.d $a0, $a0
+; LA64-NEXT: ret
+ %neg = xor i8 %a, -1
+ %tmp = call i8 @llvm.cttz.i8(i8 %neg, i1 false)
+ ret i8 %tmp
+}
+
+define i16 @test_not_cttz_i16(i16 %a) nounwind {
+; LA32-LABEL: test_not_cttz_i16:
+; LA32: # %bb.0:
+; LA32-NEXT: lu12i.w $a1, 16
+; LA32-NEXT: orn $a0, $a1, $a0
+; LA32-NEXT: ctz.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_cttz_i16:
+; LA64: # %bb.0:
+; LA64-NEXT: lu12i.w $a1, 16
+; LA64-NEXT: orn $a0, $a1, $a0
+; LA64-NEXT: ctz.d $a0, $a0
+; LA64-NEXT: ret
+ %neg = xor i16 %a, -1
+ %tmp = call i16 @llvm.cttz.i16(i16 %neg, i1 false)
+ ret i16 %tmp
+}
+
+define i32 @test_not_cttz_i32(i32 %a) nounwind {
+; LA32-LABEL: test_not_cttz_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: cto.w $a0, $a0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_cttz_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: cto.w $a0, $a0
+; LA64-NEXT: ret
+ %neg = xor i32 %a, -1
+ %tmp = call i32 @llvm.cttz.i32(i32 %neg, i1 false)
+ ret i32 %tmp
+}
+
+define i64 @test_not_cttz_i64(i64 %a) nounwind {
+; LA32-LABEL: test_not_cttz_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: nor $a2, $a0, $zero
+; LA32-NEXT: sltu $a2, $zero, $a2
+; LA32-NEXT: cto.w $a1, $a1
+; LA32-NEXT: addi.w $a1, $a1, 32
+; LA32-NEXT: masknez $a1, $a1, $a2
+; LA32-NEXT: cto.w $a0, $a0
+; LA32-NEXT: maskeqz $a0, $a0, $a2
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: move $a1, $zero
+; LA32-NEXT: ret
+;
+; LA64-LABEL: test_not_cttz_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: cto.d $a0, $a0
+; LA64-NEXT: ret
+ %neg = xor i64 %a, -1
+ %tmp = call i64 @llvm.cttz.i64(i64 %neg, i1 false)
+ ret i64 %tmp
+}
More information about the llvm-commits
mailing list