[llvm-branch-commits] [LoongArch] Legalize ISD::CTPOP for GRLenVT type with LSX (PR #106941)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Sep 1 23:19:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: wanglei (wangleiat)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/106941.diff
3 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+41)
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+4)
- (modified) llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll (+41-148)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 95c1b150722f64..0e17ce7ea02bb4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -283,6 +283,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETUGT},
VT, Expand);
}
+ setOperationAction(ISD::CTPOP, GRLenVT, Legal);
}
// Set operations for 'LASX' feature.
@@ -4488,6 +4489,44 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
+ MachineBasicBlock *BB,
+ const LoongArchSubtarget &Subtarget) {
+ assert(Subtarget.hasExtLSX());
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
+ DebugLoc DL = MI.getDebugLoc();
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register ScratchReg1 = MRI.createVirtualRegister(RC);
+ Register ScratchReg2 = MRI.createVirtualRegister(RC);
+ Register ScratchReg3 = MRI.createVirtualRegister(RC);
+
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
+ BuildMI(*BB, MI, DL,
+ TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
+ : LoongArch::VINSGR2VR_W),
+ ScratchReg2)
+ .addReg(ScratchReg1)
+ .addReg(Src)
+ .addImm(0);
+ BuildMI(
+ *BB, MI, DL,
+ TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
+ ScratchReg3)
+ .addReg(ScratchReg2);
+ BuildMI(*BB, MI, DL,
+ TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
+ : LoongArch::VPICKVE2GR_W),
+ Dst)
+ .addReg(ScratchReg3)
+ .addImm(0);
+
+ MI.eraseFromParent();
+ return BB;
+}
+
MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -4546,6 +4585,8 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
case LoongArch::PseudoXVINSGR2VR_B:
case LoongArch::PseudoXVINSGR2VR_H:
return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
+ case LoongArch::PseudoCTPOP:
+ return emitPseudoCTPOP(MI, BB, Subtarget);
}
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 659ba38c695d33..e7ac9f3bd04cbf 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1238,6 +1238,10 @@ def PseudoVBZ_W : VecCond<loongarch_vall_zero, v4i32>;
def PseudoVBZ_D : VecCond<loongarch_vall_zero, v2i64>;
def PseudoVBZ : VecCond<loongarch_vany_zero, v16i8>;
+let usesCustomInserter = 1 in
+def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+ [(set GPR:$rd, (ctpop GPR:$rj))]>;
+
} // Predicates = [HasExtLSX]
multiclass PatVr<SDPatternOperator OpNode, string Inst> {
diff --git a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
index a5cffb29eec614..c01f3cdb405682 100644
--- a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
+++ b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
@@ -10,30 +10,20 @@ declare i64 @llvm.ctpop.i64(i64)
define i8 @test_ctpop_i8(i8 %a) nounwind {
; LA32-LABEL: test_ctpop_i8:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a0, 1
-; LA32-NEXT: andi $a1, $a1, 85
-; LA32-NEXT: sub.w $a0, $a0, $a1
-; LA32-NEXT: andi $a1, $a0, 51
-; LA32-NEXT: srli.w $a0, $a0, 2
-; LA32-NEXT: andi $a0, $a0, 51
-; LA32-NEXT: add.w $a0, $a1, $a0
-; LA32-NEXT: srli.w $a1, $a0, 4
-; LA32-NEXT: add.w $a0, $a0, $a1
-; LA32-NEXT: andi $a0, $a0, 15
+; LA32-NEXT: andi $a0, $a0, 255
+; LA32-NEXT: vldi $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vpcnt.w $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: test_ctpop_i8:
; LA64: # %bb.0:
-; LA64-NEXT: srli.d $a1, $a0, 1
-; LA64-NEXT: andi $a1, $a1, 85
-; LA64-NEXT: sub.d $a0, $a0, $a1
-; LA64-NEXT: andi $a1, $a0, 51
-; LA64-NEXT: srli.d $a0, $a0, 2
-; LA64-NEXT: andi $a0, $a0, 51
-; LA64-NEXT: add.d $a0, $a1, $a0
-; LA64-NEXT: srli.d $a1, $a0, 4
-; LA64-NEXT: add.d $a0, $a0, $a1
-; LA64-NEXT: andi $a0, $a0, 15
+; LA64-NEXT: andi $a0, $a0, 255
+; LA64-NEXT: vldi $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vpcnt.d $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i8 @llvm.ctpop.i8(i8 %a)
ret i8 %1
@@ -42,42 +32,20 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
define i16 @test_ctpop_i16(i16 %a) nounwind {
; LA32-LABEL: test_ctpop_i16:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a0, 1
-; LA32-NEXT: lu12i.w $a2, 5
-; LA32-NEXT: ori $a2, $a2, 1365
-; LA32-NEXT: and $a1, $a1, $a2
-; LA32-NEXT: sub.w $a0, $a0, $a1
-; LA32-NEXT: lu12i.w $a1, 3
-; LA32-NEXT: ori $a1, $a1, 819
-; LA32-NEXT: and $a2, $a0, $a1
-; LA32-NEXT: srli.w $a0, $a0, 2
-; LA32-NEXT: and $a0, $a0, $a1
-; LA32-NEXT: add.w $a0, $a2, $a0
-; LA32-NEXT: srli.w $a1, $a0, 4
-; LA32-NEXT: add.w $a0, $a0, $a1
-; LA32-NEXT: bstrpick.w $a1, $a0, 11, 8
-; LA32-NEXT: andi $a0, $a0, 15
-; LA32-NEXT: add.w $a0, $a0, $a1
+; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT: vldi $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vpcnt.w $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: test_ctpop_i16:
; LA64: # %bb.0:
-; LA64-NEXT: srli.d $a1, $a0, 1
-; LA64-NEXT: lu12i.w $a2, 5
-; LA64-NEXT: ori $a2, $a2, 1365
-; LA64-NEXT: and $a1, $a1, $a2
-; LA64-NEXT: sub.d $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 3
-; LA64-NEXT: ori $a1, $a1, 819
-; LA64-NEXT: and $a2, $a0, $a1
-; LA64-NEXT: srli.d $a0, $a0, 2
-; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: srli.d $a1, $a0, 4
-; LA64-NEXT: add.d $a0, $a0, $a1
-; LA64-NEXT: bstrpick.d $a1, $a0, 11, 8
-; LA64-NEXT: andi $a0, $a0, 15
-; LA64-NEXT: add.d $a0, $a0, $a1
+; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT: vldi $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vpcnt.d $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i16 @llvm.ctpop.i16(i16 %a)
ret i16 %1
@@ -86,50 +54,19 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
define i32 @test_ctpop_i32(i32 %a) nounwind {
; LA32-LABEL: test_ctpop_i32:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a1, $a0, 1
-; LA32-NEXT: lu12i.w $a2, 349525
-; LA32-NEXT: ori $a2, $a2, 1365
-; LA32-NEXT: and $a1, $a1, $a2
-; LA32-NEXT: sub.w $a0, $a0, $a1
-; LA32-NEXT: lu12i.w $a1, 209715
-; LA32-NEXT: ori $a1, $a1, 819
-; LA32-NEXT: and $a2, $a0, $a1
-; LA32-NEXT: srli.w $a0, $a0, 2
-; LA32-NEXT: and $a0, $a0, $a1
-; LA32-NEXT: add.w $a0, $a2, $a0
-; LA32-NEXT: srli.w $a1, $a0, 4
-; LA32-NEXT: add.w $a0, $a0, $a1
-; LA32-NEXT: lu12i.w $a1, 61680
-; LA32-NEXT: ori $a1, $a1, 3855
-; LA32-NEXT: and $a0, $a0, $a1
-; LA32-NEXT: lu12i.w $a1, 4112
-; LA32-NEXT: ori $a1, $a1, 257
-; LA32-NEXT: mul.w $a0, $a0, $a1
-; LA32-NEXT: srli.w $a0, $a0, 24
+; LA32-NEXT: vldi $vr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vpcnt.w $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32-NEXT: ret
;
; LA64-LABEL: test_ctpop_i32:
; LA64: # %bb.0:
-; LA64-NEXT: srli.d $a1, $a0, 1
-; LA64-NEXT: lu12i.w $a2, 349525
-; LA64-NEXT: ori $a2, $a2, 1365
-; LA64-NEXT: and $a1, $a1, $a2
-; LA64-NEXT: sub.d $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 209715
-; LA64-NEXT: ori $a1, $a1, 819
-; LA64-NEXT: and $a2, $a0, $a1
-; LA64-NEXT: srli.d $a0, $a0, 2
-; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: srli.d $a1, $a0, 4
-; LA64-NEXT: add.d $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 61680
-; LA64-NEXT: ori $a1, $a1, 3855
-; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 4112
-; LA64-NEXT: ori $a1, $a1, 257
-; LA64-NEXT: mul.d $a0, $a0, $a1
-; LA64-NEXT: bstrpick.d $a0, $a0, 31, 24
+; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT: vldi $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vpcnt.d $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i32 @llvm.ctpop.i32(i32 %a)
ret i32 %1
@@ -138,68 +75,24 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
define i64 @test_ctpop_i64(i64 %a) nounwind {
; LA32-LABEL: test_ctpop_i64:
; LA32: # %bb.0:
-; LA32-NEXT: srli.w $a2, $a1, 1
-; LA32-NEXT: lu12i.w $a3, 349525
-; LA32-NEXT: ori $a3, $a3, 1365
-; LA32-NEXT: and $a2, $a2, $a3
-; LA32-NEXT: sub.w $a1, $a1, $a2
-; LA32-NEXT: lu12i.w $a2, 209715
-; LA32-NEXT: ori $a2, $a2, 819
-; LA32-NEXT: and $a4, $a1, $a2
-; LA32-NEXT: srli.w $a1, $a1, 2
-; LA32-NEXT: and $a1, $a1, $a2
-; LA32-NEXT: add.w $a1, $a4, $a1
-; LA32-NEXT: srli.w $a4, $a1, 4
-; LA32-NEXT: add.w $a1, $a1, $a4
-; LA32-NEXT: lu12i.w $a4, 61680
-; LA32-NEXT: ori $a4, $a4, 3855
-; LA32-NEXT: and $a1, $a1, $a4
-; LA32-NEXT: lu12i.w $a5, 4112
-; LA32-NEXT: ori $a5, $a5, 257
-; LA32-NEXT: mul.w $a1, $a1, $a5
-; LA32-NEXT: srli.w $a1, $a1, 24
-; LA32-NEXT: srli.w $a6, $a0, 1
-; LA32-NEXT: and $a3, $a6, $a3
-; LA32-NEXT: sub.w $a0, $a0, $a3
-; LA32-NEXT: and $a3, $a0, $a2
-; LA32-NEXT: srli.w $a0, $a0, 2
-; LA32-NEXT: and $a0, $a0, $a2
-; LA32-NEXT: add.w $a0, $a3, $a0
-; LA32-NEXT: srli.w $a2, $a0, 4
-; LA32-NEXT: add.w $a0, $a0, $a2
-; LA32-NEXT: and $a0, $a0, $a4
-; LA32-NEXT: mul.w $a0, $a0, $a5
-; LA32-NEXT: srli.w $a0, $a0, 24
+; LA32-NEXT: vldi $vr0, 0
+; LA32-NEXT: vldi $vr1, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA32-NEXT: vpcnt.w $vr1, $vr1
+; LA32-NEXT: vpickve2gr.w $a1, $vr1, 0
+; LA32-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT: vpcnt.w $vr0, $vr0
+; LA32-NEXT: vpickve2gr.w $a0, $vr0, 0
; LA32-NEXT: add.w $a0, $a0, $a1
; LA32-NEXT: move $a1, $zero
; LA32-NEXT: ret
;
; LA64-LABEL: test_ctpop_i64:
; LA64: # %bb.0:
-; LA64-NEXT: srli.d $a1, $a0, 1
-; LA64-NEXT: lu12i.w $a2, 349525
-; LA64-NEXT: ori $a2, $a2, 1365
-; LA64-NEXT: bstrins.d $a2, $a2, 62, 32
-; LA64-NEXT: and $a1, $a1, $a2
-; LA64-NEXT: sub.d $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 209715
-; LA64-NEXT: ori $a1, $a1, 819
-; LA64-NEXT: bstrins.d $a1, $a1, 61, 32
-; LA64-NEXT: and $a2, $a0, $a1
-; LA64-NEXT: srli.d $a0, $a0, 2
-; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: add.d $a0, $a2, $a0
-; LA64-NEXT: srli.d $a1, $a0, 4
-; LA64-NEXT: add.d $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 61680
-; LA64-NEXT: ori $a1, $a1, 3855
-; LA64-NEXT: bstrins.d $a1, $a1, 59, 32
-; LA64-NEXT: and $a0, $a0, $a1
-; LA64-NEXT: lu12i.w $a1, 4112
-; LA64-NEXT: ori $a1, $a1, 257
-; LA64-NEXT: bstrins.d $a1, $a1, 56, 32
-; LA64-NEXT: mul.d $a0, $a0, $a1
-; LA64-NEXT: srli.d $a0, $a0, 56
+; LA64-NEXT: vldi $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT: vpcnt.d $vr0, $vr0
+; LA64-NEXT: vpickve2gr.d $a0, $vr0, 0
; LA64-NEXT: ret
%1 = call i64 @llvm.ctpop.i64(i64 %a)
ret i64 %1
``````````
</details>
https://github.com/llvm/llvm-project/pull/106941
More information about the llvm-branch-commits
mailing list