[llvm] [LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence (PR #115803)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 11 18:42:45 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

<details>
<summary>Changes</summary>

Inspired by https://github.com/llvm/llvm-project/issues/101624.

---
Full diff: https://github.com/llvm/llvm-project/pull/115803.diff


6 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+5-5) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1) 
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+9) 
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+12) 
- (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll (+2-10) 
- (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll (+4-26) 


``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index afc016c84162e7..4d77912b9ed54f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
   case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
   case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
   case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
-  case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
-    EVT ResTy = N->getValueType(0);
-    SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
-    return DAG.getBuildVector(ResTy, DL, Ops);
-  }
+  case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
+    return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
+                       DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
+                                   N->getOperand(1)));
   case Intrinsic::loongarch_lsx_vreplve_b:
   case Intrinsic::loongarch_lsx_vreplve_h:
   case Intrinsic::loongarch_lsx_vreplve_w:
@@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
     NODE_NAME_CASE(VILVH)
     NODE_NAME_CASE(VSHUF4I)
     NODE_NAME_CASE(VREPLVEI)
+    NODE_NAME_CASE(VREPLGR2VR)
     NODE_NAME_CASE(XVPERMI)
     NODE_NAME_CASE(VPICK_SEXT_ELT)
     NODE_NAME_CASE(VPICK_ZEXT_ELT)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index df6a55a2b83190..c10acc043c5006 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -129,6 +129,7 @@ enum NodeType : unsigned {
   VILVH,
   VSHUF4I,
   VREPLVEI,
+  VREPLGR2VR,
   XVPERMI,
 
   // Extended vector element extraction
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 3e39e2c10a617a..49ae440073f2e0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
 def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
 def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
 
+def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>;
+def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>;
+def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>;
+def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>;
+
 // XVREPLVE_{B/H/W/D}
 def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
           (XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 25e70b4e6b35ae..250896cbbe5f7a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
                                      SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
 def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
                                         SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
 def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
 def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
 
@@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
 
 def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
 def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
+def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
+
 def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
 def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
 
@@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
 def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
 def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
 
+def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v16i8 (VREPLGR2VR_B GRLenVT:$rj))>;
+def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v8i16 (VREPLGR2VR_H GRLenVT:$rj))>;
+def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v4i32 (VREPLGR2VR_W GRLenVT:$rj))>;
+def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
+          (v2i64 (VREPLGR2VR_D GRLenVT:$rj))>;
+
 // VREPLVE_{B/H/W/D}
 def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
           (VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
index b3dcd373b60e08..2e538ed66b250e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
@@ -4,14 +4,8 @@
 define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
 ; CHECK-LABEL: xvrepl_ins_w:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.w $xr0, $a0
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a1, 1
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 2
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 3
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 4
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 5
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 6
-; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 7
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
@@ -22,10 +16,8 @@ entry:
 define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
 ; CHECK-LABEL: xvrepl_ins_d:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT:    xvreplgr2vr.d $xr0, $a0
 ; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a1, 1
-; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 2
-; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 3
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
index 3eb06149010402..aee74929468299 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
@@ -4,22 +4,8 @@
 define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
 ; CHECK-LABEL: vrepl_ins_b:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.b $vr0, $a0
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 2
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 3
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 4
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 5
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 6
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 7
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 8
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 9
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 10
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 11
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 12
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 13
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 14
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 15
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
@@ -30,14 +16,8 @@ entry:
 define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
 ; CHECK-LABEL: vrepl_ins_h:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.h $vr0, $a0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 2
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 3
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 4
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 5
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 6
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 7
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
@@ -48,10 +28,8 @@ entry:
 define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
 ; CHECK-LABEL: vrepl_ins_w:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.w $vr0, $a0
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a1, 1
-; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 2
-; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 3
 ; CHECK-NEXT:    ret
 entry:
   %0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
@@ -62,7 +40,7 @@ entry:
 define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
 ; CHECK-LABEL: vrepl_ins_d:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vreplgr2vr.d $vr0, $a0
 ; CHECK-NEXT:    vinsgr2vr.d $vr0, $a1, 1
 ; CHECK-NEXT:    ret
 entry:

``````````

</details>


https://github.com/llvm/llvm-project/pull/115803


More information about the llvm-commits mailing list