[llvm] [LoongArch] Optimize inserting element to high part of 256bits vector (PR #146816)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 2 20:57:55 PDT 2025
https://github.com/zhaoqi5 created https://github.com/llvm/llvm-project/pull/146816
None
>From fb473a4aa0f13a53259258539a20e56885f032fe Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Thu, 3 Jul 2025 11:49:55 +0800
Subject: [PATCH] [LoongArch] Optimize inserting element to high part of
256bits vector
---
.../LoongArch/LoongArchISelLowering.cpp | 5 +-
.../CodeGen/LoongArch/lasx/build-vector.ll | 154 ++++++++----------
.../insert-extract-pair-elements.ll | 22 ++-
.../lasx/ir-instruction/insertelement.ll | 6 +-
4 files changed, 79 insertions(+), 108 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 7dae4d30d31be..9ee58fb7f1771 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -5934,10 +5934,9 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
Register ScratchReg1 = XSrc;
if (Idx >= HalfSize) {
ScratchReg1 = MRI.createVirtualRegister(RC);
- BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
+ BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_D), ScratchReg1)
.addReg(XSrc)
- .addReg(XSrc)
- .addImm(1);
+ .addImm(14);
}
Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index b06f6523e977c..f25e988b52dc9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -250,84 +250,68 @@ define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 14
; CHECK-NEXT: ld.b $a1, $sp, 72
; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 15
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: ld.b $a2, $sp, 80
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0
-; CHECK-NEXT: ld.b $a1, $sp, 80
-; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
; CHECK-NEXT: ld.b $a1, $sp, 88
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
-; CHECK-NEXT: ld.b $a1, $sp, 96
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 1
+; CHECK-NEXT: ld.b $a2, $sp, 96
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
; CHECK-NEXT: ld.b $a1, $sp, 104
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
-; CHECK-NEXT: ld.b $a1, $sp, 112
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 3
+; CHECK-NEXT: ld.b $a2, $sp, 112
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
; CHECK-NEXT: ld.b $a1, $sp, 120
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
-; CHECK-NEXT: ld.b $a1, $sp, 128
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 5
+; CHECK-NEXT: ld.b $a2, $sp, 128
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
; CHECK-NEXT: ld.b $a1, $sp, 136
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
-; CHECK-NEXT: ld.b $a1, $sp, 144
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 7
+; CHECK-NEXT: ld.b $a2, $sp, 144
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
; CHECK-NEXT: ld.b $a1, $sp, 152
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
-; CHECK-NEXT: ld.b $a1, $sp, 160
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 9
+; CHECK-NEXT: ld.b $a2, $sp, 160
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
; CHECK-NEXT: ld.b $a1, $sp, 168
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
-; CHECK-NEXT: ld.b $a1, $sp, 176
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 11
+; CHECK-NEXT: ld.b $a2, $sp, 176
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
; CHECK-NEXT: ld.b $a1, $sp, 184
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 13
+; CHECK-NEXT: ld.b $a2, $sp, 192
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14
-; CHECK-NEXT: ld.b $a1, $sp, 192
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 15
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
@@ -371,8 +355,15 @@ entry:
define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
; CHECK-LABEL: buildvector_v16i16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: ld.h $t0, $sp, 8
-; CHECK-NEXT: ld.h $t1, $sp, 0
+; CHECK-NEXT: ld.h $t0, $sp, 64
+; CHECK-NEXT: ld.h $t1, $sp, 56
+; CHECK-NEXT: ld.h $t2, $sp, 48
+; CHECK-NEXT: ld.h $t3, $sp, 40
+; CHECK-NEXT: ld.h $t4, $sp, 32
+; CHECK-NEXT: ld.h $t5, $sp, 24
+; CHECK-NEXT: ld.h $t6, $sp, 16
+; CHECK-NEXT: ld.h $t7, $sp, 8
+; CHECK-NEXT: ld.h $t8, $sp, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 1
; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 2
@@ -380,45 +371,30 @@ define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i1
; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 4
; CHECK-NEXT: vinsgr2vr.h $vr0, $a6, 5
; CHECK-NEXT: vinsgr2vr.h $vr0, $a7, 6
-; CHECK-NEXT: vinsgr2vr.h $vr0, $t1, 7
-; CHECK-NEXT: ld.h $a1, $sp, 16
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $t0, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $t8, 7
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t7, 0
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.h $a2, $sp, 24
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t6, 1
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.h $a1, $sp, 32
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t5, 2
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.h $a2, $sp, 40
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t4, 3
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.h $a1, $sp, 48
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 4
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t3, 4
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.h $a2, $sp, 56
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t2, 5
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: ld.h $a1, $sp, 64
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 6
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t1, 6
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t0, 7
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
index 88c3e4367ffa7..a94708e96e896 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insert-extract-pair-elements.ll
@@ -9,13 +9,12 @@ define <32 x i8> @insert_extract_v32i8(<32 x i8> %a) nounwind {
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: vpickve2gr.b $a0, $vr0, 15
; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: ld.b $a1, $sp, 31
-; CHECK-NEXT: vinsgr2vr.b $vr0, $a0, 1
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
+; CHECK-NEXT: ld.b $a0, $sp, 31
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 1
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a0, 1
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
@@ -38,13 +37,12 @@ define <16 x i16> @insert_extract_v16i16(<16 x i16> %a) nounwind {
; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
; CHECK-NEXT: addi.d $fp, $sp, 64
; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: vpickve2gr.h $a0, $vr0, 7
; CHECK-NEXT: xvst $xr0, $sp, 0
-; CHECK-NEXT: ld.h $a1, $sp, 30
-; CHECK-NEXT: vinsgr2vr.h $vr0, $a0, 1
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT: ld.h $a0, $sp, 30
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 1
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 1
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: addi.d $sp, $fp, -64
; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
index 25106b456d2f7..3a4f6efd2c893 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
@@ -18,8 +18,7 @@ define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind {
; CHECK-LABEL: insert_32xi8_upper:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
; CHECK-NEXT: vinsgr2vr.b $vr1, $a2, 0
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: xvst $xr0, $a1, 0
@@ -47,8 +46,7 @@ define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind {
; CHECK-LABEL: insert_16xi16_upper:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvori.b $xr1, $xr0, 0
-; CHECK-NEXT: xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
; CHECK-NEXT: vinsgr2vr.h $vr1, $a2, 0
; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
; CHECK-NEXT: xvst $xr0, $a1, 0
More information about the llvm-commits
mailing list