[llvm] [LoongArch] Optimize vreplgr2vr + vinsgr2vr intrinsic sequence (PR #115803)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 11 18:42:45 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: ZhaoQi (zhaoqi5)
<details>
<summary>Changes</summary>
Inspired by https://github.com/llvm/llvm-project/issues/101624.
---
Full diff: https://github.com/llvm/llvm-project/pull/115803.diff
6 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+5-5)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1)
- (modified) llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td (+9)
- (modified) llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td (+12)
- (modified) llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll (+2-10)
- (modified) llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll (+4-26)
``````````diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index afc016c84162e7..4d77912b9ed54f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -4229,11 +4229,10 @@ performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
- case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
- EVT ResTy = N->getValueType(0);
- SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
- return DAG.getBuildVector(ResTy, DL, Ops);
- }
+ case Intrinsic::loongarch_lasx_xvreplgr2vr_d:
+ return DAG.getNode(LoongArchISD::VREPLGR2VR, DL, N->getValueType(0),
+ DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
+ N->getOperand(1)));
case Intrinsic::loongarch_lsx_vreplve_b:
case Intrinsic::loongarch_lsx_vreplve_h:
case Intrinsic::loongarch_lsx_vreplve_w:
@@ -4710,6 +4709,7 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VILVH)
NODE_NAME_CASE(VSHUF4I)
NODE_NAME_CASE(VREPLVEI)
+ NODE_NAME_CASE(VREPLGR2VR)
NODE_NAME_CASE(XVPERMI)
NODE_NAME_CASE(VPICK_SEXT_ELT)
NODE_NAME_CASE(VPICK_ZEXT_ELT)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index df6a55a2b83190..c10acc043c5006 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -129,6 +129,7 @@ enum NodeType : unsigned {
VILVH,
VSHUF4I,
VREPLVEI,
+ VREPLGR2VR,
XVPERMI,
// Extended vector element extraction
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 3e39e2c10a617a..49ae440073f2e0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1574,6 +1574,15 @@ def : Pat<(lasxsplati16 GPR:$rj), (XVREPLGR2VR_H GPR:$rj)>;
def : Pat<(lasxsplati32 GPR:$rj), (XVREPLGR2VR_W GPR:$rj)>;
def : Pat<(lasxsplati64 GPR:$rj), (XVREPLGR2VR_D GPR:$rj)>;
+def : Pat<(v32i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v32i8 (XVREPLGR2VR_B GRLenVT:$rj))>;
+def : Pat<(v16i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v16i16 (XVREPLGR2VR_H GRLenVT:$rj))>;
+def : Pat<(v8i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v8i32 (XVREPLGR2VR_W GRLenVT:$rj))>;
+def : Pat<(v4i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v4i64 (XVREPLGR2VR_D GRLenVT:$rj))>;
+
// XVREPLVE_{B/H/W/D}
def : Pat<(loongarch_vreplve v32i8:$xj, GRLenVT:$rk),
(XVREPLVE_B v32i8:$xj, GRLenVT:$rk)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 25e70b4e6b35ae..250896cbbe5f7a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -23,6 +23,7 @@ def SDT_LoongArchV2R : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>]>;
def SDT_LoongArchV1RUimm: SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0,1>, SDTCisVT<2, i64>]>;
+def SDT_LoongArchVreplgr2vr : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<0>, SDTCisInt<1>]>;
def SDT_LoongArchVFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
def SDT_LoongArchVFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVec<0>, SDTCisSameAs<0, 1>]>;
@@ -52,6 +53,8 @@ def loongarch_vilvh: SDNode<"LoongArchISD::VILVH", SDT_LoongArchV2R>;
def loongarch_vshuf4i: SDNode<"LoongArchISD::VSHUF4I", SDT_LoongArchV1RUimm>;
def loongarch_vreplvei: SDNode<"LoongArchISD::VREPLVEI", SDT_LoongArchV1RUimm>;
+def loongarch_vreplgr2vr: SDNode<"LoongArchISD::VREPLGR2VR", SDT_LoongArchVreplgr2vr>;
+
def loongarch_vfrecipe: SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchVFRECIPE>;
def loongarch_vfrsqrte: SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchVFRSQRTE>;
@@ -1737,6 +1740,15 @@ def : Pat<(lsxsplati16 GPR:$rj), (VREPLGR2VR_H GPR:$rj)>;
def : Pat<(lsxsplati32 GPR:$rj), (VREPLGR2VR_W GPR:$rj)>;
def : Pat<(lsxsplati64 GPR:$rj), (VREPLGR2VR_D GPR:$rj)>;
+def : Pat<(v16i8 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v16i8 (VREPLGR2VR_B GRLenVT:$rj))>;
+def : Pat<(v8i16 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v8i16 (VREPLGR2VR_H GRLenVT:$rj))>;
+def : Pat<(v4i32 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v4i32 (VREPLGR2VR_W GRLenVT:$rj))>;
+def : Pat<(v2i64 (loongarch_vreplgr2vr GRLenVT:$rj)),
+ (v2i64 (VREPLGR2VR_D GRLenVT:$rj))>;
+
// VREPLVE_{B/H/W/D}
def : Pat<(loongarch_vreplve v16i8:$vj, GRLenVT:$rk),
(VREPLVE_B v16i8:$vj, GRLenVT:$rk)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
index b3dcd373b60e08..2e538ed66b250e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/intrinsic-repl-ins-gr2vr.ll
@@ -4,14 +4,8 @@
define <8 x i32> @xvrepl_ins_w(i32 %a, i32 %b) {
; CHECK-LABEL: xvrepl_ins_w:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.w $xr0, $a0
; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 1
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 2
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 3
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 4
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 5
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 6
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 7
; CHECK-NEXT: ret
entry:
%0 = call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 %a)
@@ -22,10 +16,8 @@ entry:
define <4 x i64> @xvrepl_ins_d(i64 %a, i64 %b) {
; CHECK-LABEL: xvrepl_ins_d:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 0
+; CHECK-NEXT: xvreplgr2vr.d $xr0, $a0
; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 1
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 2
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 3
; CHECK-NEXT: ret
entry:
%0 = call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 %a)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
index 3eb06149010402..aee74929468299 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/intrinsic-repl-ins-gr2vr.ll
@@ -4,22 +4,8 @@
define <16 x i8> @vrepl_ins_b(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_b:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.b $vr0, $a0
; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 2
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 3
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 4
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 5
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 6
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 7
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 8
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 9
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 10
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 11
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 12
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 13
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 14
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 15
; CHECK-NEXT: ret
entry:
%0 = call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 %a)
@@ -30,14 +16,8 @@ entry:
define <8 x i16> @vrepl_ins_h(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_h:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.h $vr0, $a0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 2
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 3
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 4
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 5
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 6
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 7
; CHECK-NEXT: ret
entry:
%0 = call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 %a)
@@ -48,10 +28,8 @@ entry:
define <4 x i32> @vrepl_ins_w(i32 %a, i32 %b) {
; CHECK-LABEL: vrepl_ins_w:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.w $vr0, $a0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 1
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 2
-; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 3
; CHECK-NEXT: ret
entry:
%0 = call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 %a)
@@ -62,7 +40,7 @@ entry:
define <2 x i64> @vrepl_ins_d(i64 %a, i64 %b) {
; CHECK-LABEL: vrepl_ins_d:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT: vreplgr2vr.d $vr0, $a0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
; CHECK-NEXT: ret
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/115803
More information about the llvm-commits
mailing list