[llvm] [LoongArch] Optimize inserting extracted fp elements (PR #146018)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 30 20:12:11 PDT 2025


https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/146018

>From 6664ef06be7d38bb5d7d49d4be198ebf8500a746 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Fri, 27 Jun 2025 11:22:37 +0800
Subject: [PATCH 1/2] [LoongArch] Optimize inserting extracted elements

---
 llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 13 ++++++++-----
 llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td  |  5 ++++-
 .../LoongArch/lasx/ir-instruction/fix-xvshuf.ll     | 12 ++----------
 .../lasx/ir-instruction/insert-extract-element.ll   |  4 ----
 .../lsx/ir-instruction/insert-extract-element.ll    |  6 ++----
 5 files changed, 16 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index ff7b0f2ae3f25..915dc803bdbd7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1593,11 +1593,14 @@ def : Pat<(vector_insert v8i32:$xd, GRLenVT:$rj, uimm3:$imm),
           (XVINSGR2VR_W v8i32:$xd, GRLenVT:$rj, uimm3:$imm)>;
 def : Pat<(vector_insert v4i64:$xd, GRLenVT:$rj, uimm2:$imm),
           (XVINSGR2VR_D v4i64:$xd, GRLenVT:$rj, uimm2:$imm)>;
-
-def : Pat<(vector_insert v8f32:$vd, FPR32:$fj, uimm3:$imm),
-          (XVINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
-def : Pat<(vector_insert v4f64:$vd, FPR64:$fj, uimm2:$imm),
-          (XVINSGR2VR_D $vd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
+def : Pat<(vector_insert v8f32:$xd, (f32 (vector_extract v8f32:$xj, uimm3:$imm1)), uimm3:$imm2),
+          (XVINSGR2VR_W $xd, (XVPICKVE2GR_W v8f32:$xj, uimm3:$imm1), uimm3:$imm2)>;
+def : Pat<(vector_insert v4f64:$xd, (f64 (vector_extract v4f64:$xj, uimm2:$imm1)), uimm2:$imm2),
+          (XVINSGR2VR_D $xd, (XVPICKVE2GR_D v4f64:$xj, uimm2:$imm1), uimm2:$imm2)>;
+def : Pat<(vector_insert v8f32:$xd, FPR32:$fj, uimm3:$imm),
+          (XVINSGR2VR_W $xd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm3:$imm)>;
+def : Pat<(vector_insert v4f64:$xd, FPR64:$fj, uimm2:$imm),
+          (XVINSGR2VR_D $xd, (COPY_TO_REGCLASS FPR64:$fj, GPR), uimm2:$imm)>;
 
 // scalar_to_vector
 def : Pat<(v8f32 (scalar_to_vector FPR32:$fj)),
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index d73d78083ddcd..34c6ffc6727f1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1791,7 +1791,10 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
           (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>;
 def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
           (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
-
+def : Pat<(vector_insert v4f32:$vd, (f32 (vector_extract v4f32:$vj, uimm2:$imm1)), uimm2:$imm2),
+          (VINSGR2VR_W $vd, (VPICKVE2GR_W v4f32:$vj, uimm2:$imm1), uimm2:$imm2)>;
+def : Pat<(vector_insert v2f64:$vd, (f64 (vector_extract v2f64:$vj, uimm1:$imm1)), uimm1:$imm2),
+          (VINSGR2VR_D $vd, (VPICKVE2GR_D v2f64:$vj, uimm1:$imm1), uimm1:$imm2)>;
 def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
           (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
 def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
index f3bec11810e9b..f154dd3b8eb3c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/fix-xvshuf.ll
@@ -7,20 +7,12 @@ define <4 x double> @shufflevector_v4f64(<4 x double> %a, <4 x double> %b) {
 ; CHECK-LABEL: shufflevector_v4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 0
-; CHECK-NEXT:    movgr2fr.d $fa2, $a0
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr1, 2
-; CHECK-NEXT:    movgr2fr.d $fa3, $a0
-; CHECK-NEXT:    movfr2gr.d $a0, $fa2
 ; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 0
-; CHECK-NEXT:    movfr2gr.d $a0, $fa3
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr1, 2
 ; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 1
 ; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT:    movgr2fr.d $fa0, $a0
-; CHECK-NEXT:    xvpickve2gr.d $a0, $xr1, 3
-; CHECK-NEXT:    movgr2fr.d $fa1, $a0
-; CHECK-NEXT:    movfr2gr.d $a0, $fa0
 ; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 2
-; CHECK-NEXT:    movfr2gr.d $a0, $fa1
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr1, 3
 ; CHECK-NEXT:    xvinsgr2vr.d $xr2, $a0, 3
 ; CHECK-NEXT:    xvori.b $xr0, $xr2, 0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
index 3fdc439e68679..fb942defe446a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
@@ -61,8 +61,6 @@ define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT:    movgr2fr.w $fa1, $a0
-; CHECK-NEXT:    movfr2gr.s $a0, $fa1
 ; CHECK-NEXT:    xvinsgr2vr.w $xr0, $a0, 1
 ; CHECK-NEXT:    ret
 entry:
@@ -87,8 +85,6 @@ define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT:    movgr2fr.d $fa1, $a0
-; CHECK-NEXT:    movfr2gr.d $a0, $fa1
 ; CHECK-NEXT:    xvinsgr2vr.d $xr0, $a0, 1
 ; CHECK-NEXT:    ret
 entry:
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll
index c7dd1454c7e33..605e886253790 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll
@@ -40,8 +40,7 @@ entry:
 define <4 x float> @insert_extract_v4f32(<4 x float> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vreplvei.w $vr1, $vr0, 3
-; CHECK-NEXT:    movfr2gr.s $a0, $fa1
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:
@@ -65,8 +64,7 @@ entry:
 define <2 x double> @insert_extract_v2f64(<2 x double> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vreplvei.d $vr1, $vr0, 1
-; CHECK-NEXT:    movfr2gr.d $a0, $fa1
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
 ; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
 ; CHECK-NEXT:    ret
 entry:

>From aab3fee78f037271bb707eca185bd625f62dee34 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Tue, 1 Jul 2025 11:04:58 +0800
Subject: [PATCH 2/2] use vextrins instruction

---
 .../Target/LoongArch/LoongArchLSXInstrInfo.td | 51 +++++++++++++++++--
 .../ir-instruction/insert-extract-element.ll  | 18 +++----
 2 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 34c6ffc6727f1..9dd6006e3a9dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1482,6 +1482,28 @@ multiclass VstelmPat<PatFrag StoreOp, ValueType vt, LAInst Inst,
             (Inst vt:$vd, BaseAddr:$rj, ImmOpnd:$imm, IdxOpnd:$idx)>;
 }
 
+multiclass InsertExtractPatV4<ValueType vecty, ValueType elemty> {
+  foreach imm1 = 0...3 in {
+    foreach imm2 = 0...3 in {
+      defvar Imm = !or(!shl(imm2, 4), imm1);
+      def : Pat<(vector_insert vecty:$vd,
+                    (elemty (vector_extract vecty:$vj, imm1)), imm2),
+                (VEXTRINS_W $vd, $vj, Imm)>;
+    }
+  }
+}
+
+multiclass InsertExtractPatV2<ValueType vecty, ValueType elemty> {
+  foreach imm1 = 0...1 in {
+    foreach imm2 = 0...1 in {
+      defvar Imm = !or(!shl(imm2, 4), imm1);
+      def : Pat<(vector_insert vecty:$vd,
+                    (elemty (vector_extract vecty:$vj, imm1)), imm2),
+                (VEXTRINS_D $vd, $vj, Imm)>;
+    }
+  }
+}
+
 let Predicates = [HasExtLSX] in {
 
 // VADD_{B/H/W/D}
@@ -1782,6 +1804,31 @@ defm : PatCCVrVrF<SETUNE, "VFCMP_CUNE">;
 defm : PatCCVrVrF<SETO, "VFCMP_COR">;
 defm : PatCCVrVrF<SETUO, "VFCMP_CUN">;
 
+// Insert element extracted from vector into vector.
+// VPICKVE2GR_{B/H/W/D} + VINSGR2VR_{B/H/W/D} -> VEXTRINS_{B/H/W/D}
+foreach imm1 = 0...15 in {
+  foreach imm2 = 0...15 in {
+    defvar Imm = !or(!shl(imm2, 4), imm1);
+    def : Pat<(vector_insert v16i8:$vd,
+                  (GRLenVT (vector_extract v16i8:$vj, imm1)), imm2),
+              (VEXTRINS_B $vd, $vj, Imm)>;
+  }
+}
+
+foreach imm1 = 0...7 in {
+  foreach imm2 = 0...7 in {
+    defvar Imm = !or(!shl(imm2, 4), imm1);
+    def : Pat<(vector_insert v8i16:$vd,
+                  (GRLenVT (vector_extract v8i16:$vj, imm1)), imm2),
+              (VEXTRINS_H $vd, $vj, Imm)>;
+  }
+}
+
+defm : InsertExtractPatV4<v4i32, GRLenVT>;
+defm : InsertExtractPatV4<v4f32, f32>;
+defm : InsertExtractPatV2<v2i64, GRLenVT>;
+defm : InsertExtractPatV2<v2f64, f64>;
+
 // VINSGR2VR_{B/H/W/D}
 def : Pat<(vector_insert v16i8:$vd, GRLenVT:$rj, uimm4:$imm),
           (VINSGR2VR_B v16i8:$vd, GRLenVT:$rj, uimm4:$imm)>;
@@ -1791,10 +1838,6 @@ def : Pat<(vector_insert v4i32:$vd, GRLenVT:$rj, uimm2:$imm),
           (VINSGR2VR_W v4i32:$vd, GRLenVT:$rj, uimm2:$imm)>;
 def : Pat<(vector_insert v2i64:$vd, GRLenVT:$rj, uimm1:$imm),
           (VINSGR2VR_D v2i64:$vd, GRLenVT:$rj, uimm1:$imm)>;
-def : Pat<(vector_insert v4f32:$vd, (f32 (vector_extract v4f32:$vj, uimm2:$imm1)), uimm2:$imm2),
-          (VINSGR2VR_W $vd, (VPICKVE2GR_W v4f32:$vj, uimm2:$imm1), uimm2:$imm2)>;
-def : Pat<(vector_insert v2f64:$vd, (f64 (vector_extract v2f64:$vj, uimm1:$imm1)), uimm1:$imm2),
-          (VINSGR2VR_D $vd, (VPICKVE2GR_D v2f64:$vj, uimm1:$imm1), uimm1:$imm2)>;
 def : Pat<(vector_insert v4f32:$vd, FPR32:$fj, uimm2:$imm),
           (VINSGR2VR_W $vd, (COPY_TO_REGCLASS FPR32:$fj, GPR), uimm2:$imm)>;
 def : Pat<(vector_insert v2f64:$vd, FPR64:$fj, uimm1:$imm),
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll
index 605e886253790..e9a0c8a110452 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/insert-extract-element.ll
@@ -4,8 +4,7 @@
 define <16 x i8> @insert_extract_v16i8(<16 x i8> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v16i8:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 15
-; CHECK-NEXT:    vinsgr2vr.b $vr0, $a0, 1
+; CHECK-NEXT:    vextrins.b $vr0, $vr0, 31
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <16 x i8> %a, i32 15
@@ -16,8 +15,7 @@ entry:
 define <8 x i16> @insert_extract_v8i16(<8 x i16> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v8i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 7
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 1
+; CHECK-NEXT:    vextrins.h $vr0, $vr0, 23
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <8 x i16> %a, i32 7
@@ -28,8 +26,7 @@ entry:
 define <4 x i32> @insert_extract_v4i32(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v4i32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
-; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; CHECK-NEXT:    vextrins.w $vr0, $vr0, 19
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <4 x i32> %a, i32 3
@@ -40,8 +37,7 @@ entry:
 define <4 x float> @insert_extract_v4f32(<4 x float> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 3
-; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vextrins.w $vr0, $vr0, 3
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <4 x float> %a, i32 3
@@ -52,8 +48,7 @@ entry:
 define <2 x i64> @insert_extract_v2i64(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v2i64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vextrins.d $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x i64> %a, i32 1
@@ -64,8 +59,7 @@ entry:
 define <2 x double> @insert_extract_v2f64(<2 x double> %a) nounwind {
 ; CHECK-LABEL: insert_extract_v2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 1
-; CHECK-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; CHECK-NEXT:    vextrins.d $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %b = extractelement <2 x double> %a, i32 1



More information about the llvm-commits mailing list