[llvm] [LoongArch] Add patterns for vstelm instructions (PR #139201)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 8 21:56:05 PDT 2025
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/139201
>From 95f8fdd66bc45fc6642fa23a7067f19d40660453 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 9 May 2025 11:35:53 +0800
Subject: [PATCH 1/2] add patterns for vector_extract & store merged into
vstelm instructions
---
.../LoongArch/LoongArchLASXInstrInfo.td | 8 +++++++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 21 +++++++++++++++++++
2 files changed, 29 insertions(+)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index fe08c1050b4d7..802fd082564e1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1756,6 +1756,14 @@ def : Pat<(lasxsplatf32 FPR32:$fj),
def : Pat<(lasxsplatf64 FPR64:$fj),
(XVREPLVE0_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64))>;
+// VSTELM
+defm : VstelmPat<truncstorei8, v32i8, XVSTELM_B, simm12_addlike, uimm5>;
+defm : VstelmPat<truncstorei16, v16i16, XVSTELM_H, simm11_lsl1, uimm4>;
+defm : VstelmPat<truncstorei32, v8i32, XVSTELM_W, simm10_lsl2, uimm3>;
+defm : VstelmPat<store, v4i64, XVSTELM_D, simm9_lsl3, uimm2>;
+defm : VstelmPat<store, v8f32, XVSTELM_W, simm10_lsl2, uimm3, f32>;
+defm : VstelmPat<store, v4f64, XVSTELM_D, simm9_lsl3, uimm2, f64>;
+
// Loads/Stores
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in {
defm : LdPat<load, XVLD, vt>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 1ffc5f8056b96..69fbf5ae45603 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1446,6 +1446,20 @@ multiclass VldreplPat<ValueType vt, LAInst Inst, Operand ImmOpnd> {
(Inst BaseAddr:$rj, ImmOpnd:$imm)>;
}
+multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
+ Operand ImmOpnd, Operand IdxOpnd, ValueType elt = i64> {
+ def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)), BaseAddr:$rj),
+ (Inst vt:$vd, BaseAddr:$rj, 0, IdxOpnd:$idx)>;
+
+ def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)),
+ (AddrConstant GPR:$rj, ImmOpnd:$imm)),
+ (Inst vt:$vd, GPR:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
+
+ def : Pat<(StoreOp(elt(vector_extract vt:$vd, IdxOpnd:$idx)),
+ (AddLike BaseAddr:$rj, ImmOpnd:$imm)),
+ (Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
+}
+
let Predicates = [HasExtLSX] in {
// VADD_{B/H/W/D}
@@ -1935,6 +1949,13 @@ def : Pat<(lsxsplatf32 FPR32:$fj),
def : Pat<(lsxsplatf64 FPR64:$fj),
(VREPLVEI_D (SUBREG_TO_REG (i64 0), FPR64:$fj, sub_64), 0)>;
+defm : VstelmPat<truncstorei8, v16i8, VSTELM_B, simm12_addlike, uimm4>;
+defm : VstelmPat<truncstorei16, v8i16, VSTELM_H, simm11_lsl1, uimm3>;
+defm : VstelmPat<truncstorei32, v4i32, VSTELM_W, simm10_lsl2, uimm2>;
+defm : VstelmPat<store, v2i64, VSTELM_D, simm9_lsl3, uimm1>;
+defm : VstelmPat<store, v4f32, VSTELM_W, simm10_lsl2, uimm2, f32>;
+defm : VstelmPat<store, v2f64, VSTELM_D, simm9_lsl3, uimm1, f64>;
+
// Loads/Stores
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
defm : LdPat<load, VLD, vt>;
>From 6ff280de04c16a11be17dc51b5c9c2776d4087b0 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 9 May 2025 12:47:52 +0800
Subject: [PATCH 2/2] update tests
---
.../lasx/ir-instruction/extractelement.ll | 20 ++++---------
.../lsx/ir-instruction/extractelement.ll | 18 ++++-------
llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll | 30 +++++++------------
llvm/test/CodeGen/LoongArch/vector-fp-imm.ll | 3 +-
4 files changed, 23 insertions(+), 48 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
index fc2929d8e6db3..e05a236b49c20 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/extractelement.ll
@@ -5,8 +5,7 @@ define void @extract_32xi8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_32xi8:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: xvstelm.b $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <32 x i8>, ptr %src
%e = extractelement <32 x i8> %v, i32 1
@@ -18,8 +17,7 @@ define void @extract_16xi16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_16xi16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: xvstelm.h $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <16 x i16>, ptr %src
%e = extractelement <16 x i16> %v, i32 1
@@ -31,8 +29,7 @@ define void @extract_8xi32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 1
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <8 x i32>, ptr %src
%e = extractelement <8 x i32> %v, i32 1
@@ -44,8 +41,7 @@ define void @extract_4xi64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xi64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 1
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <4 x i64>, ptr %src
%e = extractelement <4 x i64> %v, i32 1
@@ -57,9 +53,7 @@ define void @extract_8xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT: movgr2fr.w $fa0, $a0
-; CHECK-NEXT: fst.s $fa0, $a1, 0
+; CHECK-NEXT: xvstelm.w $xr0, $a1, 0, 7
; CHECK-NEXT: ret
%v = load volatile <8 x float>, ptr %src
%e = extractelement <8 x float> %v, i32 7
@@ -71,9 +65,7 @@ define void @extract_4xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: movgr2fr.d $fa0, $a0
-; CHECK-NEXT: fst.d $fa0, $a1, 0
+; CHECK-NEXT: xvstelm.d $xr0, $a1, 0, 3
; CHECK-NEXT: ret
%v = load volatile <4 x double>, ptr %src
%e = extractelement <4 x double> %v, i32 3
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
index f3b8e02ac28f7..a837fe17a0120 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/extractelement.ll
@@ -5,8 +5,7 @@ define void @extract_16xi8(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_16xi8:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 1
-; CHECK-NEXT: st.b $a0, $a1, 0
+; CHECK-NEXT: vstelm.b $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <16 x i8>, ptr %src
%e = extractelement <16 x i8> %v, i32 1
@@ -18,8 +17,7 @@ define void @extract_8xi16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_8xi16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 1
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <8 x i16>, ptr %src
%e = extractelement <8 x i16> %v, i32 1
@@ -31,8 +29,7 @@ define void @extract_4xi32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <4 x i32>, ptr %src
%e = extractelement <4 x i32> %v, i32 1
@@ -44,8 +41,7 @@ define void @extract_2xi64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_2xi64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <2 x i64>, ptr %src
%e = extractelement <2 x i64> %v, i32 1
@@ -57,8 +53,7 @@ define void @extract_4xfloat(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_4xfloat:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vreplvei.w $vr0, $vr0, 1
-; CHECK-NEXT: fst.s $fa0, $a1, 0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <4 x float>, ptr %src
%e = extractelement <4 x float> %v, i32 1
@@ -70,8 +65,7 @@ define void @extract_2xdouble(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: extract_2xdouble:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vreplvei.d $vr0, $vr0, 1
-; CHECK-NEXT: fst.d $fa0, $a1, 0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 1
; CHECK-NEXT: ret
%v = load volatile <2 x double>, ptr %src
%e = extractelement <2 x double> %v, i32 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
index 480c76574715d..e056e7c38ddcd 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-trunc.ll
@@ -6,8 +6,7 @@ define void @load_trunc_2i64_to_2i32(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.w $vr0, $vr0, 8
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i32>
@@ -22,8 +21,7 @@ define void @load_trunc_2i64_to_2i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI1_0)
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI1_0)
; CHECK-NEXT: vshuf.h $vr1, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr1, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: vstelm.w $vr1, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i16>
@@ -38,8 +36,7 @@ define void @load_trunc_2i64_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI2_0)
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI2_0)
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i64>, ptr %ptr
%trunc = trunc <2 x i64> %a to <2 x i8>
@@ -52,8 +49,7 @@ define void @load_trunc_4i32_to_4i16(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickev.h $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i16>
@@ -68,8 +64,7 @@ define void @load_trunc_4i32_to_4i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(.LCPI4_0)
; CHECK-NEXT: vld $vr1, $a0, %pc_lo12(.LCPI4_0)
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr0, $vr1
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <4 x i32>, ptr %ptr
%trunc = trunc <4 x i32> %a to <4 x i8>
@@ -82,8 +77,7 @@ define void @load_trunc_8i16_to_8i8(ptr %ptr, ptr %dst) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.d $a0, $vr0, 0
-; CHECK-NEXT: st.d $a0, $a1, 0
+; CHECK-NEXT: vstelm.d $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <8 x i16>, ptr %ptr
%trunc = trunc <8 x i16> %a to <8 x i8>
@@ -97,8 +91,7 @@ define void @load_trunc_2i32_to_2i16(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.h $vr0, $vr0, 8
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i16>
@@ -114,8 +107,7 @@ define void @load_trunc_2i32_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: vld $vr0, $a2, %pc_lo12(.LCPI7_0)
; CHECK-NEXT: vinsgr2vr.d $vr1, $a0, 0
; CHECK-NEXT: vshuf.b $vr0, $vr0, $vr1, $vr0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i32>, ptr %ptr
%trunc = trunc <2 x i32> %a to <2 x i8>
@@ -129,8 +121,7 @@ define void @load_trunc_4i16_to_4i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: ld.d $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.d $vr0, $a0, 0
; CHECK-NEXT: vpickev.b $vr0, $vr0, $vr0
-; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
-; CHECK-NEXT: st.w $a0, $a1, 0
+; CHECK-NEXT: vstelm.w $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %ptr
%trunc = trunc <4 x i16> %a to <4 x i8>
@@ -144,8 +135,7 @@ define void @load_trunc_2i16_to_2i8(ptr %ptr, ptr %dst) nounwind {
; CHECK-NEXT: ld.w $a0, $a0, 0
; CHECK-NEXT: vinsgr2vr.w $vr0, $a0, 0
; CHECK-NEXT: vshuf4i.b $vr0, $vr0, 8
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 0
-; CHECK-NEXT: st.h $a0, $a1, 0
+; CHECK-NEXT: vstelm.h $vr0, $a1, 0, 0
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %ptr
%trunc = trunc <2 x i16> %a to <2 x i8>
diff --git a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
index d043eefb96a50..571828e02e56d 100644
--- a/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
+++ b/llvm/test/CodeGen/LoongArch/vector-fp-imm.ll
@@ -132,8 +132,7 @@ define void @test_f2(ptr %P, ptr %S) nounwind {
; LA64D-NEXT: lu52i.d $a0, $a0, 1024
; LA64D-NEXT: vreplgr2vr.d $vr1, $a0
; LA64D-NEXT: vfadd.s $vr0, $vr0, $vr1
-; LA64D-NEXT: vpickve2gr.d $a0, $vr0, 0
-; LA64D-NEXT: st.d $a0, $a1, 0
+; LA64D-NEXT: vstelm.d $vr0, $a1, 0, 0
; LA64D-NEXT: ret
%p = load %f2, ptr %P
%R = fadd %f2 %p, < float 1.000000e+00, float 2.000000e+00 >
More information about the llvm-commits
mailing list