[llvm] [LoongArch] Optimize inserting extracted element for v4i64/v8i32 (PR #152629)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 23:20:40 PDT 2025
https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/152629
>From 7132dbf696a6df1526ec2830fda69f07e6fd0dc9 Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 8 Aug 2025 10:34:59 +0800
Subject: [PATCH 1/3] Optimize inserting extracted element for v4i64/v8i32
---
.../Target/LoongArch/LoongArchLASXInstrInfo.td | 16 ++++++++++++++++
.../ir-instruction/insert-extract-element.ll | 8 ++++----
2 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index d8bb16fe9b94d..37d665010aa8c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1640,6 +1640,22 @@ defm : PairInsertExtractPatV8<v8f32, f32>;
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
defm : PairInsertExtractPatV4<v4f64, f64>;
+foreach imm1 = 0...7 in {
+ foreach imm2 = 0...7 in {
+ def : Pat<(vector_insert v8i32:$xd,
+ (GRLenVT(vector_extract v8i32:$xj, imm1)), imm2),
+ (XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, imm1), imm2)>;
+ }
+}
+
+foreach imm1 = 0...3 in {
+ foreach imm2 = 0...3 in {
+ def : Pat<(vector_insert v4i64:$xd,
+ (GRLenVT(vector_extract v4i64:$xj, imm1)), imm2),
+ (XVINSVE0_D v4i64:$xd, (XVPICKVE_D v4i64:$xj, imm1), imm2)>;
+ }
+}
+
// PseudoXVINSGR2VR_{B/H}
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
index ac5a2143451d0..91a1247cab89e 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
@@ -30,8 +30,8 @@ entry:
define <8 x i32> @insert_extract_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: insert_extract_v8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve2gr.w $a0, $xr0, 7
-; CHECK-NEXT: xvinsgr2vr.w $xr0, $a0, 1
+; CHECK-NEXT: xvpickve.w $xr1, $xr0, 7
+; CHECK-NEXT: xvinsve0.w $xr0, $xr1, 1
; CHECK-NEXT: ret
entry:
%b = extractelement <8 x i32> %a, i32 7
@@ -54,8 +54,8 @@ entry:
define <4 x i64> @insert_extract_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: insert_extract_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvpickve2gr.d $a0, $xr0, 3
-; CHECK-NEXT: xvinsgr2vr.d $xr0, $a0, 1
+; CHECK-NEXT: xvpickve.d $xr1, $xr0, 3
+; CHECK-NEXT: xvinsve0.d $xr0, $xr1, 1
; CHECK-NEXT: ret
entry:
%b = extractelement <4 x i64> %a, i32 3
>From b1b65af2d30b0e2ab74fb4ad7804f4e760b29e6c Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 8 Aug 2025 11:48:05 +0800
Subject: [PATCH 2/3] remove uncessary imm
---
.../LoongArch/LoongArchLASXInstrInfo.td | 24 +++++++------------
1 file changed, 9 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 37d665010aa8c..1fcbbd62edbc5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1640,21 +1640,15 @@ defm : PairInsertExtractPatV8<v8f32, f32>;
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
defm : PairInsertExtractPatV4<v4f64, f64>;
-foreach imm1 = 0...7 in {
- foreach imm2 = 0...7 in {
- def : Pat<(vector_insert v8i32:$xd,
- (GRLenVT(vector_extract v8i32:$xj, imm1)), imm2),
- (XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, imm1), imm2)>;
- }
-}
-
-foreach imm1 = 0...3 in {
- foreach imm2 = 0...3 in {
- def : Pat<(vector_insert v4i64:$xd,
- (GRLenVT(vector_extract v4i64:$xj, imm1)), imm2),
- (XVINSVE0_D v4i64:$xd, (XVPICKVE_D v4i64:$xj, imm1), imm2)>;
- }
-}
+def : Pat<(vector_insert v8i32:$xd,
+ (GRLenVT(vector_extract v8i32:$xj, uimm3:$imm1)), uimm3:$imm2),
+ (XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, uimm3:$imm1),
+ uimm3:$imm2)>;
+
+def : Pat<(vector_insert v4i64:$xd,
+ (GRLenVT(vector_extract v4i64:$xj, uimm2:$imm1)), uimm2:$imm2),
+ (XVINSVE0_D v4i64:$xd, (XVPICKVE_D v4i64:$xj, uimm2:$imm1),
+ uimm2:$imm2)>;
// PseudoXVINSGR2VR_{B/H}
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
>From 44564dd1cd6ea3b9f05a920384f6ea5f9170f65a Mon Sep 17 00:00:00 2001
From: tangaac <tangyan01 at loongson.cn>
Date: Fri, 8 Aug 2025 14:08:17 +0800
Subject: [PATCH 3/3] minor optimization when extracting from index 0
---
.../LoongArch/LoongArchLASXInstrInfo.td | 12 ++++++++++
.../ir-instruction/insert-extract-element.ll | 23 +++++++++++++++++++
2 files changed, 35 insertions(+)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 1fcbbd62edbc5..e1c717510bbfe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1640,6 +1640,18 @@ defm : PairInsertExtractPatV8<v8f32, f32>;
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
defm : PairInsertExtractPatV4<v4f64, f64>;
+foreach imm = 0...7 in {
+ def : Pat<(vector_insert v8i32:$xd, (GRLenVT(vector_extract v8i32:$xj, 0)),
+ uimm3:$imm),
+ (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
+}
+
+foreach imm = 0...3 in {
+ def : Pat<(vector_insert v4i64:$xd, (GRLenVT(vector_extract v4i64:$xj, 0)),
+ uimm2:$imm),
+ (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
+}
+
def : Pat<(vector_insert v8i32:$xd,
(GRLenVT(vector_extract v8i32:$xj, uimm3:$imm1)), uimm3:$imm2),
(XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, uimm3:$imm1),
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
index 91a1247cab89e..c074bfecf95b0 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-element.ll
@@ -39,6 +39,18 @@ entry:
ret <8 x i32> %c
}
+
+define <8 x i32> @insert_extract0_v8i32(<8 x i32> %a) nounwind {
+; CHECK-LABEL: insert_extract0_v8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvinsve0.w $xr0, $xr0, 1
+; CHECK-NEXT: ret
+entry:
+ %b = extractelement <8 x i32> %a, i32 0
+ %c = insertelement <8 x i32> %a, i32 %b, i32 1
+ ret <8 x i32> %c
+}
+
define <8 x float> @insert_extract_v8f32(<8 x float> %a) nounwind {
; CHECK-LABEL: insert_extract_v8f32:
; CHECK: # %bb.0: # %entry
@@ -63,6 +75,17 @@ entry:
ret <4 x i64> %c
}
+define <4 x i64> @insert_extract0_v4i64(<4 x i64> %a) nounwind {
+; CHECK-LABEL: insert_extract0_v4i64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: xvinsve0.d $xr0, $xr0, 1
+; CHECK-NEXT: ret
+entry:
+ %b = extractelement <4 x i64> %a, i32 0
+ %c = insertelement <4 x i64> %a, i64 %b, i32 1
+ ret <4 x i64> %c
+}
+
define <4 x double> @insert_extract_v4f64(<4 x double> %a) nounwind {
; CHECK-LABEL: insert_extract_v4f64:
; CHECK: # %bb.0: # %entry
More information about the llvm-commits
mailing list