[llvm] [LoongArch][NFC] Pre-commit tests for half width vector building optimization (PR #162985)

via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 11 02:56:03 PDT 2025


https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/162985

>From 0fdc99f24b185d2b36cc45fafd3384fe6352a4cb Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Sat, 11 Oct 2025 17:25:44 +0800
Subject: [PATCH 1/2] [LoongArch][NFC] Pre-commit tests for half width vector
 building optimization

---
 .../lasx/build-halfvec-extractvec.ll          | 451 ++++++++++++++++++
 1 file changed, 451 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll

diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll b/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll
new file mode 100644
index 0000000000000..2e3a9f69cd01c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll
@@ -0,0 +1,451 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @buildvector_ext32107654ba98fedc(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext32107654ba98fedc:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    vpickve2gr.b $a2, $vr0, 2
+; CHECK-NEXT:    vpickve2gr.b $a3, $vr0, 1
+; CHECK-NEXT:    vpickve2gr.b $a4, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a5, $vr0, 7
+; CHECK-NEXT:    vpickve2gr.b $a6, $vr0, 6
+; CHECK-NEXT:    vpickve2gr.b $a7, $vr0, 5
+; CHECK-NEXT:    vpickve2gr.b $t0, $vr0, 4
+; CHECK-NEXT:    vpickve2gr.b $t1, $vr0, 11
+; CHECK-NEXT:    vpickve2gr.b $t2, $vr0, 10
+; CHECK-NEXT:    vpickve2gr.b $t3, $vr0, 9
+; CHECK-NEXT:    vpickve2gr.b $t4, $vr0, 8
+; CHECK-NEXT:    vpickve2gr.b $t5, $vr0, 15
+; CHECK-NEXT:    vpickve2gr.b $t6, $vr0, 14
+; CHECK-NEXT:    vpickve2gr.b $t7, $vr0, 13
+; CHECK-NEXT:    vpickve2gr.b $t8, $vr0, 12
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 1
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 2
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a4, 3
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a5, 4
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a6, 5
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a7, 6
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t0, 7
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t1, 8
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t2, 9
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t3, 10
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t4, 11
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t5, 12
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t6, 13
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t7, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t8, 15
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <32 x i8>, ptr %src
+  %e0 = extractelement <32 x i8> %v, i32 3
+  %e1 = extractelement <32 x i8> %v, i32 2
+  %e2 = extractelement <32 x i8> %v, i32 1
+  %e3 = extractelement <32 x i8> %v, i32 0
+  %e4 = extractelement <32 x i8> %v, i32 7
+  %e5 = extractelement <32 x i8> %v, i32 6
+  %e6 = extractelement <32 x i8> %v, i32 5
+  %e7 = extractelement <32 x i8> %v, i32 4
+  %e8 = extractelement <32 x i8> %v, i32 11
+  %e9 = extractelement <32 x i8> %v, i32 10
+  %e10 = extractelement <32 x i8> %v, i32 9
+  %e11 = extractelement <32 x i8> %v, i32 8
+  %e12 = extractelement <32 x i8> %v, i32 15
+  %e13 = extractelement <32 x i8> %v, i32 14
+  %e14 = extractelement <32 x i8> %v, i32 13
+  %e15 = extractelement <32 x i8> %v, i32 12
+  %ins0 = insertelement <16 x i8> undef, i8 %e0, i32 0
+  %ins1 = insertelement <16 x i8> %ins0, i8 %e1, i32 1
+  %ins2 = insertelement <16 x i8> %ins1, i8 %e2, i32 2
+  %ins3 = insertelement <16 x i8> %ins2, i8 %e3, i32 3
+  %ins4 = insertelement <16 x i8> %ins3, i8 %e4, i32 4
+  %ins5 = insertelement <16 x i8> %ins4, i8 %e5, i32 5
+  %ins6 = insertelement <16 x i8> %ins5, i8 %e6, i32 6
+  %ins7 = insertelement <16 x i8> %ins6, i8 %e7, i32 7
+  %ins8 = insertelement <16 x i8> %ins7, i8 %e8, i32 8
+  %ins9 = insertelement <16 x i8> %ins8, i8 %e9, i32 9
+  %ins10 = insertelement <16 x i8> %ins9, i8 %e10, i32 10
+  %ins11 = insertelement <16 x i8> %ins10, i8 %e11, i32 11
+  %ins12 = insertelement <16 x i8> %ins11, i8 %e12, i32 12
+  %ins13 = insertelement <16 x i8> %ins12, i8 %e13, i32 13
+  %ins14 = insertelement <16 x i8> %ins13, i8 %e14, i32 14
+  %ins15 = insertelement <16 x i8> %ins14, i8 %e15, i32 15
+  store <16 x i8> %ins15, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext13579bdfx13579bdf(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext13579bdfx13579bdf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    vpickve2gr.b $a2, $vr0, 3
+; CHECK-NEXT:    vpickve2gr.b $a3, $vr0, 5
+; CHECK-NEXT:    vpickve2gr.b $a4, $vr0, 7
+; CHECK-NEXT:    vpickve2gr.b $a5, $vr0, 9
+; CHECK-NEXT:    vpickve2gr.b $a6, $vr0, 11
+; CHECK-NEXT:    vpickve2gr.b $a7, $vr0, 13
+; CHECK-NEXT:    vpickve2gr.b $t0, $vr0, 15
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 14
+; CHECK-NEXT:    vpickve2gr.b $t1, $vr0, 1
+; CHECK-NEXT:    vpickve2gr.b $t2, $vr0, 3
+; CHECK-NEXT:    vpickve2gr.b $t3, $vr0, 5
+; CHECK-NEXT:    vpickve2gr.b $t4, $vr0, 7
+; CHECK-NEXT:    vpickve2gr.b $t5, $vr0, 9
+; CHECK-NEXT:    vpickve2gr.b $t6, $vr0, 11
+; CHECK-NEXT:    vpickve2gr.b $t7, $vr0, 13
+; CHECK-NEXT:    vpickve2gr.b $t8, $vr0, 15
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a2, 1
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a3, 2
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a4, 3
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a5, 4
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a6, 5
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $a7, 6
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t0, 7
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t1, 8
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t2, 9
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t3, 10
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t4, 11
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t5, 12
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t6, 13
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t7, 14
+; CHECK-NEXT:    vinsgr2vr.b $vr0, $t8, 15
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <32 x i8>, ptr %src
+  %e0 = extractelement <32 x i8> %v, i32 1
+  %e1 = extractelement <32 x i8> %v, i32 3
+  %e2 = extractelement <32 x i8> %v, i32 5
+  %e3 = extractelement <32 x i8> %v, i32 7
+  %e4 = extractelement <32 x i8> %v, i32 9
+  %e5 = extractelement <32 x i8> %v, i32 11
+  %e6 = extractelement <32 x i8> %v, i32 13
+  %e7 = extractelement <32 x i8> %v, i32 15
+  %e8 = extractelement <32 x i8> %v, i32 17
+  %e9 = extractelement <32 x i8> %v, i32 19
+  %e10 = extractelement <32 x i8> %v, i32 21
+  %e11 = extractelement <32 x i8> %v, i32 23
+  %e12 = extractelement <32 x i8> %v, i32 25
+  %e13 = extractelement <32 x i8> %v, i32 27
+  %e14 = extractelement <32 x i8> %v, i32 29
+  %e15 = extractelement <32 x i8> %v, i32 31
+  %ins0 = insertelement <16 x i8> undef, i8 %e0, i32 0
+  %ins1 = insertelement <16 x i8> %ins0, i8 %e1, i32 1
+  %ins2 = insertelement <16 x i8> %ins1, i8 %e2, i32 2
+  %ins3 = insertelement <16 x i8> %ins2, i8 %e3, i32 3
+  %ins4 = insertelement <16 x i8> %ins3, i8 %e4, i32 4
+  %ins5 = insertelement <16 x i8> %ins4, i8 %e5, i32 5
+  %ins6 = insertelement <16 x i8> %ins5, i8 %e6, i32 6
+  %ins7 = insertelement <16 x i8> %ins6, i8 %e7, i32 7
+  %ins8 = insertelement <16 x i8> %ins7, i8 %e8, i32 8
+  %ins9 = insertelement <16 x i8> %ins8, i8 %e9, i32 9
+  %ins10 = insertelement <16 x i8> %ins9, i8 %e10, i32 10
+  %ins11 = insertelement <16 x i8> %ins10, i8 %e11, i32 11
+  %ins12 = insertelement <16 x i8> %ins11, i8 %e12, i32 12
+  %ins13 = insertelement <16 x i8> %ins12, i8 %e13, i32 13
+  %ins14 = insertelement <16 x i8> %ins13, i8 %e14, i32 14
+  %ins15 = insertelement <16 x i8> %ins14, i8 %e15, i32 15
+  store <16 x i8> %ins15, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext01234560(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext01234560:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $a2, $vr0, 1
+; CHECK-NEXT:    vpickve2gr.h $a3, $vr0, 2
+; CHECK-NEXT:    vpickve2gr.h $a4, $vr0, 3
+; CHECK-NEXT:    vpickve2gr.h $a5, $vr0, 4
+; CHECK-NEXT:    vpickve2gr.h $a6, $vr0, 5
+; CHECK-NEXT:    vpickve2gr.h $a7, $vr0, 6
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a4, 3
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 7
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <16 x i16>, ptr %src
+  %e0 = extractelement <16 x i16> %v, i32 0
+  %e1 = extractelement <16 x i16> %v, i32 1
+  %e2 = extractelement <16 x i16> %v, i32 2
+  %e3 = extractelement <16 x i16> %v, i32 3
+  %e4 = extractelement <16 x i16> %v, i32 4
+  %e5 = extractelement <16 x i16> %v, i32 5
+  %e6 = extractelement <16 x i16> %v, i32 6
+  %e7 = extractelement <16 x i16> %v, i32 0
+  %ins0 = insertelement <8 x i16> undef, i16 %e0, i32 0
+  %ins1 = insertelement <8 x i16> %ins0, i16 %e1, i32 1
+  %ins2 = insertelement <8 x i16> %ins1, i16 %e2, i32 2
+  %ins3 = insertelement <8 x i16> %ins2, i16 %e3, i32 3
+  %ins4 = insertelement <8 x i16> %ins3, i16 %e4, i32 4
+  %ins5 = insertelement <8 x i16> %ins4, i16 %e5, i32 5
+  %ins6 = insertelement <8 x i16> %ins5, i16 %e6, i32 6
+  %ins7 = insertelement <8 x i16> %ins6, i16 %e7, i32 7
+  store <8 x i16> %ins7, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext08192a3b(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext08192a3b:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 0
+; CHECK-NEXT:    xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT:    vpickve2gr.h $a2, $vr1, 0
+; CHECK-NEXT:    vpickve2gr.h $a3, $vr0, 1
+; CHECK-NEXT:    vpickve2gr.h $a4, $vr1, 1
+; CHECK-NEXT:    vpickve2gr.h $a5, $vr0, 2
+; CHECK-NEXT:    vpickve2gr.h $a6, $vr1, 2
+; CHECK-NEXT:    vpickve2gr.h $a7, $vr0, 3
+; CHECK-NEXT:    vpickve2gr.h $t0, $vr1, 3
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a2, 1
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a3, 2
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a4, 3
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a5, 4
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a6, 5
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a7, 6
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $t0, 7
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <16 x i16>, ptr %src
+  %e0 = extractelement <16 x i16> %v, i32 0
+  %e1 = extractelement <16 x i16> %v, i32 8
+  %e2 = extractelement <16 x i16> %v, i32 1
+  %e3 = extractelement <16 x i16> %v, i32 9
+  %e4 = extractelement <16 x i16> %v, i32 2
+  %e5 = extractelement <16 x i16> %v, i32 10
+  %e6 = extractelement <16 x i16> %v, i32 3
+  %e7 = extractelement <16 x i16> %v, i32 11
+  %ins0 = insertelement <8 x i16> undef, i16 %e0, i32 0
+  %ins1 = insertelement <8 x i16> %ins0, i16 %e1, i32 1
+  %ins2 = insertelement <8 x i16> %ins1, i16 %e2, i32 2
+  %ins3 = insertelement <8 x i16> %ins2, i16 %e3, i32 3
+  %ins4 = insertelement <8 x i16> %ins3, i16 %e4, i32 4
+  %ins5 = insertelement <8 x i16> %ins4, i16 %e5, i32 5
+  %ins6 = insertelement <8 x i16> %ins5, i16 %e6, i32 6
+  %ins7 = insertelement <8 x i16> %ins6, i16 %e7, i32 7
+  store <8 x i16> %ins7, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext0000(ptr %dst, ptr %src) nounwind {
+; LA32-LABEL: buildvector_ext0000:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a1, $a1, 0
+; LA32-NEXT:    vreplgr2vr.w $vr0, $a1
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: buildvector_ext0000:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a1, 0
+; LA64-NEXT:    xvpickve2gr.w $a1, $xr0, 0
+; LA64-NEXT:    vreplgr2vr.w $vr0, $a1
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %v = load <8 x i32>, ptr %src
+  %e0 = extractelement <8 x i32> %v, i32 0
+  %e1 = extractelement <8 x i32> %v, i32 0
+  %e2 = extractelement <8 x i32> %v, i32 0
+  %e3 = extractelement <8 x i32> %v, i32 0
+  %ins0 = insertelement <4 x i32> undef, i32 %e0, i32 0
+  %ins1 = insertelement <4 x i32> %ins0, i32 %e1, i32 1
+  %ins2 = insertelement <4 x i32> %ins1, i32 %e2, i32 2
+  %ins3 = insertelement <4 x i32> %ins2, i32 %e3, i32 3
+  store <4 x i32> %ins3, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext7610(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext7610:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 7
+; CHECK-NEXT:    xvpickve2gr.w $a2, $xr0, 6
+; CHECK-NEXT:    xvpickve2gr.w $a3, $xr0, 1
+; CHECK-NEXT:    xvpickve2gr.w $a4, $xr0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a1, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a2, 1
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a3, 2
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a4, 3
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <8 x i32>, ptr %src
+  %e0 = extractelement <8 x i32> %v, i32 7
+  %e1 = extractelement <8 x i32> %v, i32 6
+  %e2 = extractelement <8 x i32> %v, i32 1
+  %e3 = extractelement <8 x i32> %v, i32 0
+  %ins0 = insertelement <4 x i32> undef, i32 %e0, i32 0
+  %ins1 = insertelement <4 x i32> %ins0, i32 %e1, i32 1
+  %ins2 = insertelement <4 x i32> %ins1, i32 %e2, i32 2
+  %ins3 = insertelement <4 x i32> %ins2, i32 %e3, i32 3
+  store <4 x i32> %ins3, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext0113(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext0113:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 1
+; CHECK-NEXT:    xvpickve.w $xr2, $xr0, 3
+; CHECK-NEXT:    vextrins.w $vr0, $vr1, 16
+; CHECK-NEXT:    vextrins.w $vr0, $vr1, 32
+; CHECK-NEXT:    vextrins.w $vr0, $vr2, 48
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <8 x float>, ptr %src
+  %e0 = extractelement <8 x float> %v, i32 0
+  %e1 = extractelement <8 x float> %v, i32 1
+  %e2 = extractelement <8 x float> %v, i32 1
+  %e3 = extractelement <8 x float> %v, i32 3
+  %ins0 = insertelement <4 x float> undef, float %e0, i32 0
+  %ins1 = insertelement <4 x float> %ins0, float %e1, i32 1
+  %ins2 = insertelement <4 x float> %ins1, float %e2, i32 2
+  %ins3 = insertelement <4 x float> %ins2, float %e3, i32 3
+  store <4 x float> %ins3, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext6060(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext6060:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpickve.w $xr1, $xr0, 0
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 238
+; CHECK-NEXT:    xvrepl128vei.w $xr0, $xr0, 2
+; CHECK-NEXT:    vextrins.w $vr0, $vr1, 16
+; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 0
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <8 x float>, ptr %src
+  %e0 = extractelement <8 x float> %v, i32 6
+  %e1 = extractelement <8 x float> %v, i32 0
+  %e2 = extractelement <8 x float> %v, i32 6
+  %e3 = extractelement <8 x float> %v, i32 0
+  %ins0 = insertelement <4 x float> undef, float %e0, i32 0
+  %ins1 = insertelement <4 x float> %ins0, float %e1, i32 1
+  %ins2 = insertelement <4 x float> %ins1, float %e2, i32 2
+  %ins3 = insertelement <4 x float> %ins2, float %e3, i32 3
+  store <4 x float> %ins3, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext00(ptr %dst, ptr %src) nounwind {
+; LA32-LABEL: buildvector_ext00:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a1, 0
+; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 1
+; LA32-NEXT:    xvpickve2gr.w $a2, $xr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a1, 1
+; LA32-NEXT:    vreplvei.d $vr0, $vr0, 0
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: buildvector_ext00:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a1, $a1, 0
+; LA64-NEXT:    vreplgr2vr.d $vr0, $a1
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %v = load <4 x i64>, ptr %src
+  %e0 = extractelement <4 x i64> %v, i32 0
+  %e1 = extractelement <4 x i64> %v, i32 0
+  %ins0 = insertelement <2 x i64> undef, i64 %e0, i32 0
+  %ins1 = insertelement <2 x i64> %ins0, i64 %e1, i32 1
+  store <2 x i64> %ins1, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext12(ptr %dst, ptr %src) nounwind {
+; LA32-LABEL: buildvector_ext12:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a1, 0
+; LA32-NEXT:    xvpickve2gr.w $a1, $xr0, 3
+; LA32-NEXT:    xvpickve2gr.w $a2, $xr0, 2
+; LA32-NEXT:    xvpickve2gr.w $a3, $xr0, 5
+; LA32-NEXT:    xvpickve2gr.w $a4, $xr0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a1, 1
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 3
+; LA32-NEXT:    vst $vr0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: buildvector_ext12:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a1, 0
+; LA64-NEXT:    xvpickve2gr.d $a1, $xr0, 1
+; LA64-NEXT:    xvpickve2gr.d $a2, $xr0, 2
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a1, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a2, 1
+; LA64-NEXT:    vst $vr0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  %v = load <4 x i64>, ptr %src
+  %e0 = extractelement <4 x i64> %v, i32 1
+  %e1 = extractelement <4 x i64> %v, i32 2
+  %ins0 = insertelement <2 x i64> undef, i64 %e0, i32 0
+  %ins1 = insertelement <2 x i64> %ins0, i64 %e1, i32 1
+  store <2 x i64> %ins1, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext10(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext10:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 0
+; CHECK-NEXT:    vreplvei.d $vr0, $vr0, 1
+; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <4 x double>, ptr %src
+  %e0 = extractelement <4 x double> %v, i32 1
+  %e1 = extractelement <4 x double> %v, i32 0
+  %ins0 = insertelement <2 x double> undef, double %e0, i32 0
+  %ins1 = insertelement <2 x double> %ins0, double %e1, i32 1
+  store <2 x double> %ins1, ptr %dst
+  ret void
+}
+
+define void @buildvector_ext31(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: buildvector_ext31:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a1, 0
+; CHECK-NEXT:    xvpickve.d $xr1, $xr0, 1
+; CHECK-NEXT:    xvpermi.d $xr0, $xr0, 3
+; CHECK-NEXT:    vextrins.d $vr0, $vr1, 16
+; CHECK-NEXT:    vst $vr0, $a0, 0
+; CHECK-NEXT:    ret
+entry:
+  %v = load <4 x double>, ptr %src
+  %e0 = extractelement <4 x double> %v, i32 3
+  %e1 = extractelement <4 x double> %v, i32 1
+  %ins0 = insertelement <2 x double> undef, double %e0, i32 0
+  %ins1 = insertelement <2 x double> %ins0, double %e1, i32 1
+  store <2 x double> %ins1, ptr %dst
+  ret void
+}

>From 5796c8da45e07d41a0a3034d41c4fd19f28c68d2 Mon Sep 17 00:00:00 2001
From: Qi Zhao <zhaoqi01 at loongson.cn>
Date: Sat, 11 Oct 2025 17:54:00 +0800
Subject: [PATCH 2/2] using poison

---
 .../lasx/build-halfvec-extractvec.ll          | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll b/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll
index 2e3a9f69cd01c..1d19c4b243a7a 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-halfvec-extractvec.ll
@@ -58,7 +58,7 @@ entry:
   %e13 = extractelement <32 x i8> %v, i32 14
   %e14 = extractelement <32 x i8> %v, i32 13
   %e15 = extractelement <32 x i8> %v, i32 12
-  %ins0 = insertelement <16 x i8> undef, i8 %e0, i32 0
+  %ins0 = insertelement <16 x i8> poison, i8 %e0, i32 0
   %ins1 = insertelement <16 x i8> %ins0, i8 %e1, i32 1
   %ins2 = insertelement <16 x i8> %ins1, i8 %e2, i32 2
   %ins3 = insertelement <16 x i8> %ins2, i8 %e3, i32 3
@@ -135,7 +135,7 @@ entry:
   %e13 = extractelement <32 x i8> %v, i32 27
   %e14 = extractelement <32 x i8> %v, i32 29
   %e15 = extractelement <32 x i8> %v, i32 31
-  %ins0 = insertelement <16 x i8> undef, i8 %e0, i32 0
+  %ins0 = insertelement <16 x i8> poison, i8 %e0, i32 0
   %ins1 = insertelement <16 x i8> %ins0, i8 %e1, i32 1
   %ins2 = insertelement <16 x i8> %ins1, i8 %e2, i32 2
   %ins3 = insertelement <16 x i8> %ins2, i8 %e3, i32 3
@@ -186,7 +186,7 @@ entry:
   %e5 = extractelement <16 x i16> %v, i32 5
   %e6 = extractelement <16 x i16> %v, i32 6
   %e7 = extractelement <16 x i16> %v, i32 0
-  %ins0 = insertelement <8 x i16> undef, i16 %e0, i32 0
+  %ins0 = insertelement <8 x i16> poison, i16 %e0, i32 0
   %ins1 = insertelement <8 x i16> %ins0, i16 %e1, i32 1
   %ins2 = insertelement <8 x i16> %ins1, i16 %e2, i32 2
   %ins3 = insertelement <8 x i16> %ins2, i16 %e3, i32 3
@@ -231,7 +231,7 @@ entry:
   %e5 = extractelement <16 x i16> %v, i32 10
   %e6 = extractelement <16 x i16> %v, i32 3
   %e7 = extractelement <16 x i16> %v, i32 11
-  %ins0 = insertelement <8 x i16> undef, i16 %e0, i32 0
+  %ins0 = insertelement <8 x i16> poison, i16 %e0, i32 0
   %ins1 = insertelement <8 x i16> %ins0, i16 %e1, i32 1
   %ins2 = insertelement <8 x i16> %ins1, i16 %e2, i32 2
   %ins3 = insertelement <8 x i16> %ins2, i16 %e3, i32 3
@@ -264,7 +264,7 @@ entry:
   %e1 = extractelement <8 x i32> %v, i32 0
   %e2 = extractelement <8 x i32> %v, i32 0
   %e3 = extractelement <8 x i32> %v, i32 0
-  %ins0 = insertelement <4 x i32> undef, i32 %e0, i32 0
+  %ins0 = insertelement <4 x i32> poison, i32 %e0, i32 0
   %ins1 = insertelement <4 x i32> %ins0, i32 %e1, i32 1
   %ins2 = insertelement <4 x i32> %ins1, i32 %e2, i32 2
   %ins3 = insertelement <4 x i32> %ins2, i32 %e3, i32 3
@@ -292,7 +292,7 @@ entry:
   %e1 = extractelement <8 x i32> %v, i32 6
   %e2 = extractelement <8 x i32> %v, i32 1
   %e3 = extractelement <8 x i32> %v, i32 0
-  %ins0 = insertelement <4 x i32> undef, i32 %e0, i32 0
+  %ins0 = insertelement <4 x i32> poison, i32 %e0, i32 0
   %ins1 = insertelement <4 x i32> %ins0, i32 %e1, i32 1
   %ins2 = insertelement <4 x i32> %ins1, i32 %e2, i32 2
   %ins3 = insertelement <4 x i32> %ins2, i32 %e3, i32 3
@@ -317,7 +317,7 @@ entry:
   %e1 = extractelement <8 x float> %v, i32 1
   %e2 = extractelement <8 x float> %v, i32 1
   %e3 = extractelement <8 x float> %v, i32 3
-  %ins0 = insertelement <4 x float> undef, float %e0, i32 0
+  %ins0 = insertelement <4 x float> poison, float %e0, i32 0
   %ins1 = insertelement <4 x float> %ins0, float %e1, i32 1
   %ins2 = insertelement <4 x float> %ins1, float %e2, i32 2
   %ins3 = insertelement <4 x float> %ins2, float %e3, i32 3
@@ -342,7 +342,7 @@ entry:
   %e1 = extractelement <8 x float> %v, i32 0
   %e2 = extractelement <8 x float> %v, i32 6
   %e3 = extractelement <8 x float> %v, i32 0
-  %ins0 = insertelement <4 x float> undef, float %e0, i32 0
+  %ins0 = insertelement <4 x float> poison, float %e0, i32 0
   %ins1 = insertelement <4 x float> %ins0, float %e1, i32 1
   %ins2 = insertelement <4 x float> %ins1, float %e2, i32 2
   %ins3 = insertelement <4 x float> %ins2, float %e3, i32 3
@@ -372,7 +372,7 @@ entry:
   %v = load <4 x i64>, ptr %src
   %e0 = extractelement <4 x i64> %v, i32 0
   %e1 = extractelement <4 x i64> %v, i32 0
-  %ins0 = insertelement <2 x i64> undef, i64 %e0, i32 0
+  %ins0 = insertelement <2 x i64> poison, i64 %e0, i32 0
   %ins1 = insertelement <2 x i64> %ins0, i64 %e1, i32 1
   store <2 x i64> %ins1, ptr %dst
   ret void
@@ -406,7 +406,7 @@ entry:
   %v = load <4 x i64>, ptr %src
   %e0 = extractelement <4 x i64> %v, i32 1
   %e1 = extractelement <4 x i64> %v, i32 2
-  %ins0 = insertelement <2 x i64> undef, i64 %e0, i32 0
+  %ins0 = insertelement <2 x i64> poison, i64 %e0, i32 0
   %ins1 = insertelement <2 x i64> %ins0, i64 %e1, i32 1
   store <2 x i64> %ins1, ptr %dst
   ret void
@@ -425,7 +425,7 @@ entry:
   %v = load <4 x double>, ptr %src
   %e0 = extractelement <4 x double> %v, i32 1
   %e1 = extractelement <4 x double> %v, i32 0
-  %ins0 = insertelement <2 x double> undef, double %e0, i32 0
+  %ins0 = insertelement <2 x double> poison, double %e0, i32 0
   %ins1 = insertelement <2 x double> %ins0, double %e1, i32 1
   store <2 x double> %ins1, ptr %dst
   ret void
@@ -444,7 +444,7 @@ entry:
   %v = load <4 x double>, ptr %src
   %e0 = extractelement <4 x double> %v, i32 3
   %e1 = extractelement <4 x double> %v, i32 1
-  %ins0 = insertelement <2 x double> undef, double %e0, i32 0
+  %ins0 = insertelement <2 x double> poison, double %e0, i32 0
   %ins1 = insertelement <2 x double> %ins0, double %e1, i32 1
   store <2 x double> %ins1, ptr %dst
   ret void



More information about the llvm-commits mailing list