[llvm] efa5063 - [LoongArch] Optimize inserting element to high part of 256bits vector (#146816)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 17 02:52:16 PDT 2025


Author: ZhaoQi
Date: 2025-07-17T17:52:12+08:00
New Revision: efa5063ba7a7151056439b70901219311c531cec

URL: https://github.com/llvm/llvm-project/commit/efa5063ba7a7151056439b70901219311c531cec
DIFF: https://github.com/llvm/llvm-project/commit/efa5063ba7a7151056439b70901219311c531cec.diff

LOG: [LoongArch] Optimize inserting element to high part of 256bits vector (#146816)

Added: 
    

Modified: 
    llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
    llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
    llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 39a1d542dd309..2378664ca8155 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -6000,10 +6000,9 @@ emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
   Register ScratchReg1 = XSrc;
   if (Idx >= HalfSize) {
     ScratchReg1 = MRI.createVirtualRegister(RC);
-    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
+    BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_D), ScratchReg1)
         .addReg(XSrc)
-        .addReg(XSrc)
-        .addImm(1);
+        .addImm(14);
   }
 
   Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);

diff  --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index b06f6523e977c..f25e988b52dc9 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -250,84 +250,68 @@ define void @buildvector_v32i8(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4,
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 14
 ; CHECK-NEXT:    ld.b $a1, $sp, 72
 ; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 15
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT:    ld.b $a2, $sp, 80
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 0
-; CHECK-NEXT:    ld.b $a1, $sp, 80
-; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 1
 ; CHECK-NEXT:    ld.b $a1, $sp, 88
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 2
-; CHECK-NEXT:    ld.b $a1, $sp, 96
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 1
+; CHECK-NEXT:    ld.b $a2, $sp, 96
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 3
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 2
 ; CHECK-NEXT:    ld.b $a1, $sp, 104
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 4
-; CHECK-NEXT:    ld.b $a1, $sp, 112
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 3
+; CHECK-NEXT:    ld.b $a2, $sp, 112
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 5
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 4
 ; CHECK-NEXT:    ld.b $a1, $sp, 120
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 6
-; CHECK-NEXT:    ld.b $a1, $sp, 128
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 5
+; CHECK-NEXT:    ld.b $a2, $sp, 128
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 7
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 6
 ; CHECK-NEXT:    ld.b $a1, $sp, 136
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 8
-; CHECK-NEXT:    ld.b $a1, $sp, 144
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 7
+; CHECK-NEXT:    ld.b $a2, $sp, 144
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 9
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 8
 ; CHECK-NEXT:    ld.b $a1, $sp, 152
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 10
-; CHECK-NEXT:    ld.b $a1, $sp, 160
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 9
+; CHECK-NEXT:    ld.b $a2, $sp, 160
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 11
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 10
 ; CHECK-NEXT:    ld.b $a1, $sp, 168
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 12
-; CHECK-NEXT:    ld.b $a1, $sp, 176
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 11
+; CHECK-NEXT:    ld.b $a2, $sp, 176
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 13
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 12
 ; CHECK-NEXT:    ld.b $a1, $sp, 184
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 13
+; CHECK-NEXT:    ld.b $a2, $sp, 192
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 14
-; CHECK-NEXT:    ld.b $a1, $sp, 192
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.b $vr1, $a1, 15
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 15
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
@@ -371,8 +355,15 @@ entry:
 define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7, i16 %a8, i16 %a9, i16 %a10, i16 %a11, i16 %a12, i16 %a13, i16 %a14, i16 %a15) nounwind {
 ; CHECK-LABEL: buildvector_v16i16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    ld.h $t0, $sp, 8
-; CHECK-NEXT:    ld.h $t1, $sp, 0
+; CHECK-NEXT:    ld.h $t0, $sp, 64
+; CHECK-NEXT:    ld.h $t1, $sp, 56
+; CHECK-NEXT:    ld.h $t2, $sp, 48
+; CHECK-NEXT:    ld.h $t3, $sp, 40
+; CHECK-NEXT:    ld.h $t4, $sp, 32
+; CHECK-NEXT:    ld.h $t5, $sp, 24
+; CHECK-NEXT:    ld.h $t6, $sp, 16
+; CHECK-NEXT:    ld.h $t7, $sp, 8
+; CHECK-NEXT:    ld.h $t8, $sp, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
@@ -380,45 +371,30 @@ define void @buildvector_v16i16(ptr %dst, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i1
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
 ; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
-; CHECK-NEXT:    vinsgr2vr.h $vr0, $t1, 7
-; CHECK-NEXT:    ld.h $a1, $sp, 16
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $t0, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $t8, 7
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t7, 0
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a2, $sp, 24
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t6, 1
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 32
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 2
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t5, 2
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a2, $sp, 40
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t4, 3
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 48
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 4
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t3, 4
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a2, $sp, 56
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t2, 5
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    ld.h $a1, $sp, 64
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 6
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t1, 6
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
-; CHECK-NEXT:    vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $t0, 7
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret

diff  --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
index 25106b456d2f7..3a4f6efd2c893 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/insertelement.ll
@@ -18,8 +18,7 @@ define void @insert_32xi8_upper(ptr %src, ptr %dst, i8 %in) nounwind {
 ; CHECK-LABEL: insert_32xi8_upper:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
 ; CHECK-NEXT:    vinsgr2vr.b $vr1, $a2, 0
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvst $xr0, $a1, 0
@@ -47,8 +46,7 @@ define void @insert_16xi16_upper(ptr %src, ptr %dst, i16 %in) nounwind {
 ; CHECK-LABEL: insert_16xi16_upper:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvori.b $xr1, $xr0, 0
-; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a2, 0
 ; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 2
 ; CHECK-NEXT:    xvst $xr0, $a1, 0


        


More information about the llvm-commits mailing list