[llvm-branch-commits] [LoongArch] Legalize ISD::CTPOP for GRLenVT type with LSX (PR #106941)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Sep 1 23:19:08 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-loongarch

Author: wanglei (wangleiat)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/106941.diff


3 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+41) 
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+4) 
- (modified) llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll (+41-148) 


``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 95c1b150722f64..0e17ce7ea02bb4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -283,6 +283,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
                          ISD::SETUGE, ISD::SETUGT},
                         VT, Expand);
     }
+    setOperationAction(ISD::CTPOP, GRLenVT, Legal);
   }
 
   // Set operations for 'LASX' feature.
@@ -4488,6 +4489,44 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
   return BB;
 }
 
+static MachineBasicBlock *emitPseudoCTPOP(MachineInstr &MI,
+                                          MachineBasicBlock *BB,
+                                          const LoongArchSubtarget &Subtarget) {
+  assert(Subtarget.hasExtLSX());
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  const TargetRegisterClass *RC = &LoongArch::LSX128RegClass;
+  DebugLoc DL = MI.getDebugLoc();
+  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+  Register Dst = MI.getOperand(0).getReg();
+  Register Src = MI.getOperand(1).getReg();
+  Register ScratchReg1 = MRI.createVirtualRegister(RC);
+  Register ScratchReg2 = MRI.createVirtualRegister(RC);
+  Register ScratchReg3 = MRI.createVirtualRegister(RC);
+
+  BuildMI(*BB, MI, DL, TII->get(LoongArch::VLDI), ScratchReg1).addImm(0);
+  BuildMI(*BB, MI, DL,
+          TII->get(Subtarget.is64Bit() ? LoongArch::VINSGR2VR_D
+                                       : LoongArch::VINSGR2VR_W),
+          ScratchReg2)
+      .addReg(ScratchReg1)
+      .addReg(Src)
+      .addImm(0);
+  BuildMI(
+      *BB, MI, DL,
+      TII->get(Subtarget.is64Bit() ? LoongArch::VPCNT_D : LoongArch::VPCNT_W),
+      ScratchReg3)
+      .addReg(ScratchReg2);
+  BuildMI(*BB, MI, DL,
+          TII->get(Subtarget.is64Bit() ? LoongArch::VPICKVE2GR_D
+                                       : LoongArch::VPICKVE2GR_W),
+          Dst)
+      .addReg(ScratchReg3)
+      .addImm(0);
+
+  MI.eraseFromParent();
+  return BB;
+}
+
 MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *BB) const {
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
@@ -4546,6 +4585,8 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
   case LoongArch::PseudoXVINSGR2VR_B:
   case LoongArch::PseudoXVINSGR2VR_H:
     return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
+  case LoongArch::PseudoCTPOP:
+    return emitPseudoCTPOP(MI, BB, Subtarget);
   }
 }
 
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 659ba38c695d33..e7ac9f3bd04cbf 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1238,6 +1238,10 @@ def PseudoVBZ_W : VecCond<loongarch_vall_zero, v4i32>;
 def PseudoVBZ_D : VecCond<loongarch_vall_zero, v2i64>;
 def PseudoVBZ : VecCond<loongarch_vany_zero, v16i8>;
 
+let usesCustomInserter = 1 in
+def PseudoCTPOP : Pseudo<(outs GPR:$rd), (ins GPR:$rj),
+                         [(set GPR:$rd, (ctpop GPR:$rj))]>;
+
 } // Predicates = [HasExtLSX]
 
 multiclass PatVr<SDPatternOperator OpNode, string Inst> {
diff --git a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
index a5cffb29eec614..c01f3cdb405682 100644
--- a/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
+++ b/llvm/test/CodeGen/LoongArch/ctpop-with-lsx.ll
@@ -10,30 +10,20 @@ declare i64 @llvm.ctpop.i64(i64)
 define i8 @test_ctpop_i8(i8 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a0, 1
-; LA32-NEXT:    andi $a1, $a1, 85
-; LA32-NEXT:    sub.w $a0, $a0, $a1
-; LA32-NEXT:    andi $a1, $a0, 51
-; LA32-NEXT:    srli.w $a0, $a0, 2
-; LA32-NEXT:    andi $a0, $a0, 51
-; LA32-NEXT:    add.w $a0, $a1, $a0
-; LA32-NEXT:    srli.w $a1, $a0, 4
-; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    andi $a0, $a0, 15
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    vldi $vr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT:    vpcnt.w $vr0, $vr0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_ctpop_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    andi $a1, $a1, 85
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    andi $a1, $a0, 51
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    andi $a0, $a0, 51
-; LA64-NEXT:    add.d $a0, $a1, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    andi $a0, $a0, 15
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i8 @llvm.ctpop.i8(i8 %a)
   ret i8 %1
@@ -42,42 +32,20 @@ define i8 @test_ctpop_i8(i8 %a) nounwind {
 define i16 @test_ctpop_i16(i16 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a0, 1
-; LA32-NEXT:    lu12i.w $a2, 5
-; LA32-NEXT:    ori $a2, $a2, 1365
-; LA32-NEXT:    and $a1, $a1, $a2
-; LA32-NEXT:    sub.w $a0, $a0, $a1
-; LA32-NEXT:    lu12i.w $a1, 3
-; LA32-NEXT:    ori $a1, $a1, 819
-; LA32-NEXT:    and $a2, $a0, $a1
-; LA32-NEXT:    srli.w $a0, $a0, 2
-; LA32-NEXT:    and $a0, $a0, $a1
-; LA32-NEXT:    add.w $a0, $a2, $a0
-; LA32-NEXT:    srli.w $a1, $a0, 4
-; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    bstrpick.w $a1, $a0, 11, 8
-; LA32-NEXT:    andi $a0, $a0, 15
-; LA32-NEXT:    add.w $a0, $a0, $a1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vldi $vr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT:    vpcnt.w $vr0, $vr0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_ctpop_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 5
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    and $a1, $a1, $a2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 3
-; LA64-NEXT:    ori $a1, $a1, 819
-; LA64-NEXT:    and $a2, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    add.d $a0, $a2, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    bstrpick.d $a1, $a0, 11, 8
-; LA64-NEXT:    andi $a0, $a0, 15
-; LA64-NEXT:    add.d $a0, $a0, $a1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i16 @llvm.ctpop.i16(i16 %a)
   ret i16 %1
@@ -86,50 +54,19 @@ define i16 @test_ctpop_i16(i16 %a) nounwind {
 define i32 @test_ctpop_i32(i32 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i32:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a1, $a0, 1
-; LA32-NEXT:    lu12i.w $a2, 349525
-; LA32-NEXT:    ori $a2, $a2, 1365
-; LA32-NEXT:    and $a1, $a1, $a2
-; LA32-NEXT:    sub.w $a0, $a0, $a1
-; LA32-NEXT:    lu12i.w $a1, 209715
-; LA32-NEXT:    ori $a1, $a1, 819
-; LA32-NEXT:    and $a2, $a0, $a1
-; LA32-NEXT:    srli.w $a0, $a0, 2
-; LA32-NEXT:    and $a0, $a0, $a1
-; LA32-NEXT:    add.w $a0, $a2, $a0
-; LA32-NEXT:    srli.w $a1, $a0, 4
-; LA32-NEXT:    add.w $a0, $a0, $a1
-; LA32-NEXT:    lu12i.w $a1, 61680
-; LA32-NEXT:    ori $a1, $a1, 3855
-; LA32-NEXT:    and $a0, $a0, $a1
-; LA32-NEXT:    lu12i.w $a1, 4112
-; LA32-NEXT:    ori $a1, $a1, 257
-; LA32-NEXT:    mul.w $a0, $a0, $a1
-; LA32-NEXT:    srli.w $a0, $a0, 24
+; LA32-NEXT:    vldi $vr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT:    vpcnt.w $vr0, $vr0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_ctpop_i32:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 349525
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    and $a1, $a1, $a2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 209715
-; LA64-NEXT:    ori $a1, $a1, 819
-; LA64-NEXT:    and $a2, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    add.d $a0, $a2, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 61680
-; LA64-NEXT:    ori $a1, $a1, 3855
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 4112
-; LA64-NEXT:    ori $a1, $a1, 257
-; LA64-NEXT:    mul.d $a0, $a0, $a1
-; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 24
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %1
@@ -138,68 +75,24 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; LA32-LABEL: test_ctpop_i64:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    srli.w $a2, $a1, 1
-; LA32-NEXT:    lu12i.w $a3, 349525
-; LA32-NEXT:    ori $a3, $a3, 1365
-; LA32-NEXT:    and $a2, $a2, $a3
-; LA32-NEXT:    sub.w $a1, $a1, $a2
-; LA32-NEXT:    lu12i.w $a2, 209715
-; LA32-NEXT:    ori $a2, $a2, 819
-; LA32-NEXT:    and $a4, $a1, $a2
-; LA32-NEXT:    srli.w $a1, $a1, 2
-; LA32-NEXT:    and $a1, $a1, $a2
-; LA32-NEXT:    add.w $a1, $a4, $a1
-; LA32-NEXT:    srli.w $a4, $a1, 4
-; LA32-NEXT:    add.w $a1, $a1, $a4
-; LA32-NEXT:    lu12i.w $a4, 61680
-; LA32-NEXT:    ori $a4, $a4, 3855
-; LA32-NEXT:    and $a1, $a1, $a4
-; LA32-NEXT:    lu12i.w $a5, 4112
-; LA32-NEXT:    ori $a5, $a5, 257
-; LA32-NEXT:    mul.w $a1, $a1, $a5
-; LA32-NEXT:    srli.w $a1, $a1, 24
-; LA32-NEXT:    srli.w $a6, $a0, 1
-; LA32-NEXT:    and $a3, $a6, $a3
-; LA32-NEXT:    sub.w $a0, $a0, $a3
-; LA32-NEXT:    and $a3, $a0, $a2
-; LA32-NEXT:    srli.w $a0, $a0, 2
-; LA32-NEXT:    and $a0, $a0, $a2
-; LA32-NEXT:    add.w $a0, $a3, $a0
-; LA32-NEXT:    srli.w $a2, $a0, 4
-; LA32-NEXT:    add.w $a0, $a0, $a2
-; LA32-NEXT:    and $a0, $a0, $a4
-; LA32-NEXT:    mul.w $a0, $a0, $a5
-; LA32-NEXT:    srli.w $a0, $a0, 24
+; LA32-NEXT:    vldi $vr0, 0
+; LA32-NEXT:    vldi $vr1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a1, 0
+; LA32-NEXT:    vpcnt.w $vr1, $vr1
+; LA32-NEXT:    vpickve2gr.w $a1, $vr1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; LA32-NEXT:    vpcnt.w $vr0, $vr0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; LA32-NEXT:    add.w $a0, $a0, $a1
 ; LA32-NEXT:    move $a1, $zero
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test_ctpop_i64:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    srli.d $a1, $a0, 1
-; LA64-NEXT:    lu12i.w $a2, 349525
-; LA64-NEXT:    ori $a2, $a2, 1365
-; LA64-NEXT:    bstrins.d $a2, $a2, 62, 32
-; LA64-NEXT:    and $a1, $a1, $a2
-; LA64-NEXT:    sub.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 209715
-; LA64-NEXT:    ori $a1, $a1, 819
-; LA64-NEXT:    bstrins.d $a1, $a1, 61, 32
-; LA64-NEXT:    and $a2, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 2
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    add.d $a0, $a2, $a0
-; LA64-NEXT:    srli.d $a1, $a0, 4
-; LA64-NEXT:    add.d $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 61680
-; LA64-NEXT:    ori $a1, $a1, 3855
-; LA64-NEXT:    bstrins.d $a1, $a1, 59, 32
-; LA64-NEXT:    and $a0, $a0, $a1
-; LA64-NEXT:    lu12i.w $a1, 4112
-; LA64-NEXT:    ori $a1, $a1, 257
-; LA64-NEXT:    bstrins.d $a1, $a1, 56, 32
-; LA64-NEXT:    mul.d $a0, $a0, $a1
-; LA64-NEXT:    srli.d $a0, $a0, 56
+; LA64-NEXT:    vldi $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpcnt.d $vr0, $vr0
+; LA64-NEXT:    vpickve2gr.d $a0, $vr0, 0
 ; LA64-NEXT:    ret
   %1 = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %1

``````````

</details>


https://github.com/llvm/llvm-project/pull/106941


More information about the llvm-branch-commits mailing list