[llvm] [LoongArch][NFC] Pre-commit tests for vector sign and zero extensions (PR #160809)

via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 25 21:58:12 PDT 2025


https://github.com/heiher created https://github.com/llvm/llvm-project/pull/160809

None

>From aec52219a8b7c60e8d2dff2440b5c4c44596b377 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Fri, 26 Sep 2025 09:16:46 +0800
Subject: [PATCH] [LoongArch][NFC] Pre-commit tests for vector sign and zero
 extensions

---
 llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll | 1074 ++++++++++++++++
 llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll | 1206 ++++++++++++++++++
 llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll  |  443 +++++--
 llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll  |  356 ++++--
 4 files changed, 2919 insertions(+), 160 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll

diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
new file mode 100644
index 0000000000000..953e6c45608c0
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-sext.ll
@@ -0,0 +1,1074 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_2i8_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.w $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 56
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i8>, ptr %ptr
+  %B = sext <2 x i8> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_2i16_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.w $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 48
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i16>, ptr %ptr
+  %B = sext <2 x i16> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_2i32_to_2i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT:    vslli.d $vr0, $vr0, 32
+; LA32-NEXT:    vsrai.d $vr0, $vr0, 32
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_2i32_to_2i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vshuf4i.w $vr0, $vr0, 16
+; LA64-NEXT:    vslli.d $vr0, $vr0, 32
+; LA64-NEXT:    vsrai.d $vr0, $vr0, 32
+; LA64-NEXT:    vst $vr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <2 x i32>, ptr %ptr
+  %B = sext <2 x i32> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_4i8_to_4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 24
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = sext <4 x i8> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_4i8_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI4_0)
+; CHECK-NEXT:    xvld $xr0, $a2, %pc_lo12(.LCPI4_0)
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    xvpermi.d $xr1, $xr1, 68
+; CHECK-NEXT:    xvshuf.b $xr0, $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvslli.d $xr0, $xr0, 56
+; CHECK-NEXT:    xvsrai.d $xr0, $xr0, 56
+; CHECK-NEXT:    xvst $xr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = sext <4 x i8> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i16_to_4i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_4i16_to_4i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; LA32-NEXT:    vslli.w $vr0, $vr0, 16
+; LA32-NEXT:    vsrai.w $vr0, $vr0, 16
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_4i16_to_4i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; LA64-NEXT:    vslli.w $vr0, $vr0, 16
+; LA64-NEXT:    vsrai.w $vr0, $vr0, 16
+; LA64-NEXT:    vst $vr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = sext <4 x i16> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_4i16_to_4i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI6_0)
+; LA32-NEXT:    xvld $xr0, $a3, %pc_lo12(.LCPI6_0)
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    xvpermi.d $xr1, $xr1, 68
+; LA32-NEXT:    xvshuf.h $xr0, $xr0, $xr1
+; LA32-NEXT:    xvslli.d $xr0, $xr0, 48
+; LA32-NEXT:    xvsrai.d $xr0, $xr0, 48
+; LA32-NEXT:    xvst $xr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_4i16_to_4i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI6_0)
+; LA64-NEXT:    xvld $xr0, $a2, %pc_lo12(.LCPI6_0)
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    xvpermi.d $xr1, $xr1, 68
+; LA64-NEXT:    xvshuf.h $xr0, $xr0, $xr1
+; LA64-NEXT:    xvslli.d $xr0, $xr0, 48
+; LA64-NEXT:    xvsrai.d $xr0, $xr0, 48
+; LA64-NEXT:    xvst $xr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = sext <4 x i16> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_4i32_to_4i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vextrins.w $vr1, $vr0, 2
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    vextrins.w $vr1, $vr0, 35
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vori.b $vr2, $vr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT:    vextrins.w $vr2, $vr0, 33
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_4i32_to_4i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i32>, ptr %ptr
+  %B = sext <4 x i32> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i8_to_8i16:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA32-NEXT:    vslli.h $vr0, $vr0, 8
+; LA32-NEXT:    vsrai.h $vr0, $vr0, 8
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i8_to_8i16:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA64-NEXT:    vslli.h $vr0, $vr0, 8
+; LA64-NEXT:    vsrai.h $vr0, $vr0, 8
+; LA64-NEXT:    vst $vr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i16>
+  store <8 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i8_to_8i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    pcalau12i $a3, %pc_hi20(.LCPI9_0)
+; LA32-NEXT:    xvld $xr0, $a3, %pc_lo12(.LCPI9_0)
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    xvpermi.d $xr1, $xr1, 68
+; LA32-NEXT:    xvshuf.b $xr0, $xr0, $xr1, $xr0
+; LA32-NEXT:    xvslli.w $xr0, $xr0, 24
+; LA32-NEXT:    xvsrai.w $xr0, $xr0, 24
+; LA32-NEXT:    xvst $xr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i8_to_8i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI9_0)
+; LA64-NEXT:    xvld $xr0, $a2, %pc_lo12(.LCPI9_0)
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    xvpermi.d $xr1, $xr1, 68
+; LA64-NEXT:    xvshuf.b $xr0, $xr0, $xr1, $xr0
+; LA64-NEXT:    xvslli.w $xr0, $xr0, 24
+; LA64-NEXT:    xvsrai.w $xr0, $xr0, 24
+; LA64-NEXT:    xvst $xr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i8_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    xvpermi.d $xr1, $xr0, 68
+; LA32-NEXT:    # kill: def $vr0 killed $vr0 killed $xr0
+; LA32-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI10_0)
+; LA32-NEXT:    xvld $xr2, $a2, %pc_lo12(.LCPI10_0)
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vreplvei.w $vr0, $vr0, 1
+; LA32-NEXT:    xvpermi.d $xr0, $xr0, 68
+; LA32-NEXT:    xvshuf.b $xr0, $xr0, $xr0, $xr2
+; LA32-NEXT:    xvslli.d $xr0, $xr0, 56
+; LA32-NEXT:    xvsrai.d $xr0, $xr0, 56
+; LA32-NEXT:    xvshuf.b $xr1, $xr0, $xr1, $xr2
+; LA32-NEXT:    xvslli.d $xr1, $xr1, 56
+; LA32-NEXT:    xvsrai.d $xr1, $xr1, 56
+; LA32-NEXT:    xvst $xr1, $a1, 0
+; LA32-NEXT:    xvst $xr0, $a1, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i8_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    pcalau12i $a2, %pc_hi20(.LCPI10_0)
+; LA64-NEXT:    xvld $xr0, $a2, %pc_lo12(.LCPI10_0)
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vsrli.d $vr2, $vr1, 32
+; LA64-NEXT:    xvpermi.d $xr2, $xr2, 68
+; LA64-NEXT:    xvshuf.b $xr2, $xr0, $xr2, $xr0
+; LA64-NEXT:    xvslli.d $xr2, $xr2, 56
+; LA64-NEXT:    xvsrai.d $xr2, $xr2, 56
+; LA64-NEXT:    xvpermi.d $xr1, $xr1, 68
+; LA64-NEXT:    xvshuf.b $xr0, $xr0, $xr1, $xr0
+; LA64-NEXT:    xvslli.d $xr0, $xr0, 56
+; LA64-NEXT:    xvsrai.d $xr0, $xr0, 56
+; LA64-NEXT:    xvst $xr0, $a1, 0
+; LA64-NEXT:    xvst $xr2, $a1, 32
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT:    xvst $xr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = sext <8 x i16> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i16_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA32-NEXT:    ext.w.h $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 1
+; LA32-NEXT:    vpickve2gr.h $a2, $vr0, 3
+; LA32-NEXT:    ext.w.h $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a3, 3
+; LA32-NEXT:    vpickve2gr.h $a3, $vr0, 0
+; LA32-NEXT:    ext.w.h $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 0
+; LA32-NEXT:    vpickve2gr.h $a4, $vr0, 1
+; LA32-NEXT:    ext.w.h $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA32-NEXT:    ext.w.h $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 1
+; LA32-NEXT:    vpickve2gr.h $a2, $vr0, 7
+; LA32-NEXT:    ext.w.h $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a3, 3
+; LA32-NEXT:    vpickve2gr.h $a3, $vr0, 4
+; LA32-NEXT:    ext.w.h $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 0
+; LA32-NEXT:    vpickve2gr.h $a4, $vr0, 5
+; LA32-NEXT:    ext.w.h $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr3, $xr1, 2
+; LA32-NEXT:    xvst $xr3, $a1, 32
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i16_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr1, 2
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = sext <8 x i16> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i32_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT:    vextrins.w $vr2, $vr1, 2
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 2
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT:    vextrins.w $vr2, $vr1, 35
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 3
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 0
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vori.b $vr3, $vr1, 0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 1
+; LA32-NEXT:    vextrins.w $vr3, $vr1, 33
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 1
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA32-NEXT:    vextrins.w $vr1, $vr0, 2
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    vextrins.w $vr1, $vr0, 35
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vori.b $vr2, $vr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT:    vextrins.w $vr2, $vr0, 33
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT:    srai.w $a0, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    xvst $xr3, $a1, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i32_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 2
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 3
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 1
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i32>, ptr %ptr
+  %B = sext <8 x i32> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i8_to_16i16(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 8
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 9
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 10
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 11
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 12
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 4
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 13
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 5
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 14
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 6
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 15
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 7
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 4
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 5
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 6
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 7
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT:    xvst $xr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = sext <16 x i8> %A to <16 x i16>
+  store <16 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i8_to_16i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_16i8_to_16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 12
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 13
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 14
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 15
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 8
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 9
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 10
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 11
+; CHECK-NEXT:    ext.w.b $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr3, $xr1, 2
+; CHECK-NEXT:    xvst $xr3, $a1, 32
+; CHECK-NEXT:    xvst $xr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = sext <16 x i8> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i8_to_16i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_16i8_to_16i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 2
+; LA32-NEXT:    ext.w.b $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 1
+; LA32-NEXT:    vpickve2gr.b $a2, $vr1, 3
+; LA32-NEXT:    ext.w.b $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 3
+; LA32-NEXT:    vpickve2gr.b $a3, $vr1, 0
+; LA32-NEXT:    ext.w.b $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 0
+; LA32-NEXT:    vpickve2gr.b $a4, $vr1, 1
+; LA32-NEXT:    ext.w.b $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr0, $xr2, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 6
+; LA32-NEXT:    ext.w.b $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 1
+; LA32-NEXT:    vpickve2gr.b $a2, $vr1, 7
+; LA32-NEXT:    ext.w.b $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 3
+; LA32-NEXT:    vpickve2gr.b $a3, $vr1, 4
+; LA32-NEXT:    ext.w.b $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 0
+; LA32-NEXT:    vpickve2gr.b $a4, $vr1, 5
+; LA32-NEXT:    ext.w.b $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr3, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 10
+; LA32-NEXT:    ext.w.b $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 1
+; LA32-NEXT:    vpickve2gr.b $a2, $vr1, 11
+; LA32-NEXT:    ext.w.b $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 3
+; LA32-NEXT:    vpickve2gr.b $a3, $vr1, 8
+; LA32-NEXT:    ext.w.b $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a3, 0
+; LA32-NEXT:    vpickve2gr.b $a4, $vr1, 9
+; LA32-NEXT:    ext.w.b $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr4, $xr3, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 14
+; LA32-NEXT:    ext.w.b $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 1
+; LA32-NEXT:    vpickve2gr.b $a2, $vr1, 15
+; LA32-NEXT:    ext.w.b $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 3
+; LA32-NEXT:    vpickve2gr.b $a3, $vr1, 12
+; LA32-NEXT:    ext.w.b $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a3, 0
+; LA32-NEXT:    vpickve2gr.b $a4, $vr1, 13
+; LA32-NEXT:    ext.w.b $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr5, $xr3, 2
+; LA32-NEXT:    xvst $xr5, $a1, 96
+; LA32-NEXT:    xvst $xr4, $a1, 64
+; LA32-NEXT:    xvst $xr2, $a1, 32
+; LA32-NEXT:    xvst $xr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_16i8_to_16i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr1, $xr2, 2
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 10
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 11
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 8
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 9
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr4, $xr2, 2
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 14
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 15
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 12
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 13
+; LA64-NEXT:    ext.w.b $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr5, $xr2, 2
+; LA64-NEXT:    xvst $xr5, $a1, 96
+; LA64-NEXT:    xvst $xr4, $a1, 64
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    xvst $xr1, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = sext <16 x i8> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_16i16_to_16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvld $xr0, $a0, 0
+; CHECK-NEXT:    xvpermi.q $xr1, $xr0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 4
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 5
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 6
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 7
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 0
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 1
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 2
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr1, 3
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr3, $xr2, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; CHECK-NEXT:    ext.w.h $a0, $a0
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT:    xvst $xr2, $a1, 0
+; CHECK-NEXT:    xvst $xr3, $a1, 32
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i16>, ptr %ptr
+  %B = sext <16 x i16> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_16i16_to_16i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr1, $a0, 0
+; LA32-NEXT:    xvpermi.q $xr3, $xr1, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 2
+; LA32-NEXT:    ext.w.h $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 1
+; LA32-NEXT:    vpickve2gr.h $a2, $vr3, 3
+; LA32-NEXT:    ext.w.h $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 3
+; LA32-NEXT:    vpickve2gr.h $a3, $vr3, 0
+; LA32-NEXT:    ext.w.h $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 0
+; LA32-NEXT:    vpickve2gr.h $a4, $vr3, 1
+; LA32-NEXT:    ext.w.h $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr0, $xr2, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 6
+; LA32-NEXT:    ext.w.h $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a2, 1
+; LA32-NEXT:    vpickve2gr.h $a2, $vr3, 7
+; LA32-NEXT:    ext.w.h $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a3, 3
+; LA32-NEXT:    vpickve2gr.h $a3, $vr3, 4
+; LA32-NEXT:    ext.w.h $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 0
+; LA32-NEXT:    vpickve2gr.h $a4, $vr3, 5
+; LA32-NEXT:    ext.w.h $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr4, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 2
+; LA32-NEXT:    ext.w.h $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 1
+; LA32-NEXT:    vpickve2gr.h $a2, $vr1, 3
+; LA32-NEXT:    ext.w.h $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 3
+; LA32-NEXT:    vpickve2gr.h $a3, $vr1, 0
+; LA32-NEXT:    ext.w.h $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a3, 0
+; LA32-NEXT:    vpickve2gr.h $a4, $vr1, 1
+; LA32-NEXT:    ext.w.h $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr4, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr4, $xr3, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 6
+; LA32-NEXT:    ext.w.h $a0, $a0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA32-NEXT:    srai.w $a2, $a0, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 1
+; LA32-NEXT:    vpickve2gr.h $a2, $vr1, 7
+; LA32-NEXT:    ext.w.h $a2, $a2
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a2, 2
+; LA32-NEXT:    srai.w $a3, $a2, 31
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a3, 3
+; LA32-NEXT:    vpickve2gr.h $a3, $vr1, 4
+; LA32-NEXT:    ext.w.h $a3, $a3
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a3, 0
+; LA32-NEXT:    vpickve2gr.h $a4, $vr1, 5
+; LA32-NEXT:    ext.w.h $a4, $a4
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a4, 1
+; LA32-NEXT:    srai.w $a3, $a3, 31
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a3, 1
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a4, 2
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a2, 3
+; LA32-NEXT:    srai.w $a0, $a4, 31
+; LA32-NEXT:    vinsgr2vr.w $vr5, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr5, $xr3, 2
+; LA32-NEXT:    xvst $xr5, $a1, 32
+; LA32-NEXT:    xvst $xr4, $a1, 0
+; LA32-NEXT:    xvst $xr2, $a1, 96
+; LA32-NEXT:    xvst $xr0, $a1, 64
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_16i16_to_16i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr2, $xr0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 2
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 3
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 0
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 1
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr1, $xr3, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 6
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 7
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 4
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 5
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr4, $xr3, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA64-NEXT:    ext.w.h $a0, $a0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr5, $xr2, 2
+; LA64-NEXT:    xvst $xr5, $a1, 32
+; LA64-NEXT:    xvst $xr3, $a1, 0
+; LA64-NEXT:    xvst $xr4, $a1, 96
+; LA64-NEXT:    xvst $xr1, $a1, 64
+; LA64-NEXT:    ret
+entry:
+  %A = load <16 x i16>, ptr %ptr
+  %B = sext <16 x i16> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
+  ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll
new file mode 100644
index 0000000000000..f0548cc9e32f4
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/vec-zext.ll
@@ -0,0 +1,1206 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_2i8_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.h $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.h $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i8>, ptr %ptr
+  %B = zext <2 x i8> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_2i16_to_2i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <2 x i16>, ptr %ptr
+  %B = zext <2 x i16> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_2i32_to_2i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vrepli.b $vr0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_2i32_to_2i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vrepli.b $vr1, 0
+; LA64-NEXT:    vilvl.w $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <2 x i32>, ptr %ptr
+  %B = zext <2 x i32> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_4i8_to_4i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = zext <4 x i8> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_4i8_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    xvrepli.b $xr1, 0
+; CHECK-NEXT:    xvreplgr2vr.b $xr2, $a0
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 18
+; CHECK-NEXT:    xvextrins.b $xr1, $xr2, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; CHECK-NEXT:    xvreplgr2vr.b $xr2, $a0
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 18
+; CHECK-NEXT:    xvextrins.b $xr1, $xr2, 136
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; CHECK-NEXT:    xvreplgr2vr.b $xr2, $a0
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 48
+; CHECK-NEXT:    xvextrins.b $xr1, $xr2, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; CHECK-NEXT:    xvreplgr2vr.b $xr0, $a0
+; CHECK-NEXT:    xvpermi.q $xr0, $xr1, 48
+; CHECK-NEXT:    xvextrins.b $xr1, $xr0, 136
+; CHECK-NEXT:    xvst $xr1, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = zext <4 x i8> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i16_to_4i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_4i16_to_4i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vrepli.b $vr1, 0
+; LA32-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_4i16_to_4i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vrepli.b $vr1, 0
+; LA64-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = zext <4 x i16> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_4i16_to_4i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA32-NEXT:    xvrepli.b $xr1, 0
+; LA32-NEXT:    xvreplgr2vr.h $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.h $xr1, $xr2, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    xvreplgr2vr.h $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.h $xr1, $xr2, 68
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA32-NEXT:    xvreplgr2vr.h $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA32-NEXT:    xvextrins.h $xr1, $xr2, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA32-NEXT:    xvreplgr2vr.h $xr0, $a0
+; LA32-NEXT:    xvpermi.q $xr0, $xr1, 48
+; LA32-NEXT:    xvextrins.h $xr1, $xr0, 68
+; LA32-NEXT:    xvst $xr1, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_4i16_to_4i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    xvrepli.b $xr1, 0
+; LA64-NEXT:    xvreplgr2vr.h $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA64-NEXT:    xvextrins.h $xr1, $xr2, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    xvreplgr2vr.h $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA64-NEXT:    xvextrins.h $xr1, $xr2, 68
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    xvreplgr2vr.h $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA64-NEXT:    xvextrins.h $xr1, $xr2, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    xvreplgr2vr.h $xr0, $a0
+; LA64-NEXT:    xvpermi.q $xr0, $xr1, 48
+; LA64-NEXT:    xvextrins.h $xr1, $xr0, 68
+; LA64-NEXT:    xvst $xr1, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = zext <4 x i16> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_4i32_to_4i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT:    xvrepli.b $xr1, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 2
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 4
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 6
+; LA32-NEXT:    xvst $xr1, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_4i32_to_4i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i32>, ptr %ptr
+  %B = zext <4 x i32> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i8_to_8i16:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vrepli.b $vr1, 0
+; LA32-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i8_to_8i16:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vrepli.b $vr1, 0
+; LA64-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i16>
+  store <8 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i8_to_8i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; LA32-NEXT:    xvrepli.b $xr1, 0
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 68
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 136
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 204
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 68
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 136
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; LA32-NEXT:    xvreplgr2vr.b $xr0, $a0
+; LA32-NEXT:    xvpermi.q $xr0, $xr1, 48
+; LA32-NEXT:    xvextrins.b $xr1, $xr0, 204
+; LA32-NEXT:    xvst $xr1, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i8_to_8i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; LA64-NEXT:    xvrepli.b $xr1, 0
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 68
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 136
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 204
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 68
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; LA64-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 48
+; LA64-NEXT:    xvextrins.b $xr1, $xr2, 136
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; LA64-NEXT:    xvreplgr2vr.b $xr0, $a0
+; LA64-NEXT:    xvpermi.q $xr0, $xr1, 48
+; LA64-NEXT:    xvextrins.b $xr1, $xr0, 204
+; LA64-NEXT:    xvst $xr1, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i8_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vpickve2gr.b $a2, $vr0, 0
+; LA32-NEXT:    vpickve2gr.b $a3, $vr0, 1
+; LA32-NEXT:    vpickve2gr.b $a4, $vr0, 2
+; LA32-NEXT:    vpickve2gr.b $a5, $vr0, 3
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vreplvei.w $vr0, $vr0, 1
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; LA32-NEXT:    xvrepli.b $xr1, 0
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvori.b $xr3, $xr1, 0
+; LA32-NEXT:    xvextrins.b $xr3, $xr2, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; LA32-NEXT:    xvreplgr2vr.b $xr4, $a0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; LA32-NEXT:    xvreplgr2vr.b $xr0, $a0
+; LA32-NEXT:    xvpermi.q $xr2, $xr3, 18
+; LA32-NEXT:    xvextrins.b $xr3, $xr2, 136
+; LA32-NEXT:    xvreplgr2vr.b $xr2, $a2
+; LA32-NEXT:    xvpermi.q $xr4, $xr3, 48
+; LA32-NEXT:    xvextrins.b $xr3, $xr4, 0
+; LA32-NEXT:    xvreplgr2vr.b $xr4, $a3
+; LA32-NEXT:    xvpermi.q $xr0, $xr3, 48
+; LA32-NEXT:    xvextrins.b $xr3, $xr0, 136
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 18
+; LA32-NEXT:    xvextrins.b $xr1, $xr2, 0
+; LA32-NEXT:    xvpermi.q $xr4, $xr1, 18
+; LA32-NEXT:    xvextrins.b $xr1, $xr4, 136
+; LA32-NEXT:    xvreplgr2vr.b $xr0, $a4
+; LA32-NEXT:    xvpermi.q $xr0, $xr1, 48
+; LA32-NEXT:    xvextrins.b $xr1, $xr0, 0
+; LA32-NEXT:    xvreplgr2vr.b $xr0, $a5
+; LA32-NEXT:    xvpermi.q $xr0, $xr1, 48
+; LA32-NEXT:    xvextrins.b $xr1, $xr0, 136
+; LA32-NEXT:    xvst $xr1, $a1, 0
+; LA32-NEXT:    xvst $xr3, $a1, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i8_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vsrli.d $vr1, $vr0, 32
+; LA64-NEXT:    vpickve2gr.b $a0, $vr1, 0
+; LA64-NEXT:    xvrepli.b $xr2, 0
+; LA64-NEXT:    xvreplgr2vr.b $xr3, $a0
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 18
+; LA64-NEXT:    xvori.b $xr4, $xr2, 0
+; LA64-NEXT:    xvextrins.b $xr4, $xr3, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr1, 1
+; LA64-NEXT:    xvreplgr2vr.b $xr3, $a0
+; LA64-NEXT:    xvpermi.q $xr3, $xr4, 18
+; LA64-NEXT:    xvextrins.b $xr4, $xr3, 136
+; LA64-NEXT:    vpickve2gr.b $a0, $vr1, 2
+; LA64-NEXT:    xvreplgr2vr.b $xr3, $a0
+; LA64-NEXT:    xvpermi.q $xr3, $xr4, 48
+; LA64-NEXT:    xvextrins.b $xr4, $xr3, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr1, 3
+; LA64-NEXT:    xvreplgr2vr.b $xr1, $a0
+; LA64-NEXT:    xvpermi.q $xr1, $xr4, 48
+; LA64-NEXT:    xvextrins.b $xr4, $xr1, 136
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; LA64-NEXT:    xvreplgr2vr.b $xr1, $a0
+; LA64-NEXT:    xvpermi.q $xr1, $xr2, 18
+; LA64-NEXT:    xvextrins.b $xr2, $xr1, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA64-NEXT:    xvreplgr2vr.b $xr1, $a0
+; LA64-NEXT:    xvpermi.q $xr1, $xr2, 18
+; LA64-NEXT:    xvextrins.b $xr2, $xr1, 136
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; LA64-NEXT:    xvreplgr2vr.b $xr1, $a0
+; LA64-NEXT:    xvpermi.q $xr1, $xr2, 48
+; LA64-NEXT:    xvextrins.b $xr2, $xr1, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; LA64-NEXT:    xvreplgr2vr.b $xr0, $a0
+; LA64-NEXT:    xvpermi.q $xr0, $xr2, 48
+; LA64-NEXT:    xvextrins.b $xr2, $xr0, 136
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    xvst $xr4, $a1, 32
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i16_to_8i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i16_to_8i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = zext <8 x i16> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i16_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA32-NEXT:    xvrepli.b $xr1, 0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvori.b $xr2, $xr1, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 4
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 6
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 4
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 6
+; LA32-NEXT:    xvst $xr1, $a1, 32
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i16_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr1, 2
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = zext <8 x i16> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i32_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT:    xvrepli.b $xr2, 0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 0
+; LA32-NEXT:    xvori.b $xr3, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 1
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 2
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 2
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 4
+; LA32-NEXT:    vpickve2gr.w $a0, $vr1, 3
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 6
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 4
+; LA32-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 6
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    xvst $xr3, $a1, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i32_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr1, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.w $a0, $vr0, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i32>, ptr %ptr
+  %B = zext <8 x i32> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i8_to_16i16(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 8
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 9
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 10
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 11
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 12
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 4
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 13
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 5
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 14
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 6
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 15
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 7
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 4
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 5
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 6
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.h $vr2, $a0, 7
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT:    xvst $xr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = zext <16 x i8> %A to <16 x i16>
+  store <16 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i8_to_16i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_16i8_to_16i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 12
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 13
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 14
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 15
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 8
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 9
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 10
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 2
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 11
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; CHECK-NEXT:    xvpermi.q $xr3, $xr1, 2
+; CHECK-NEXT:    xvst $xr3, $a1, 32
+; CHECK-NEXT:    xvst $xr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = zext <16 x i8> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i8_to_16i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_16i8_to_16i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    vld $vr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 0
+; LA32-NEXT:    xvrepli.b $xr2, 0
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvori.b $xr0, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr0, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 1
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr0, $a0, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 2
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr0, $a0, 4
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 3
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr0, $a0, 6
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 4
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvori.b $xr3, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 5
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 6
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 4
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 7
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 6
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 8
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvori.b $xr4, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 9
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 10
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 4
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 11
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 6
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 12
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 13
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 14
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 4
+; LA32-NEXT:    vpickve2gr.b $a0, $vr1, 15
+; LA32-NEXT:    andi $a0, $a0, 255
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 6
+; LA32-NEXT:    xvst $xr2, $a1, 96
+; LA32-NEXT:    xvst $xr4, $a1, 64
+; LA32-NEXT:    xvst $xr3, $a1, 32
+; LA32-NEXT:    xvst $xr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_16i8_to_16i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    vld $vr0, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 2
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 3
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 1
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr1, $xr2, 2
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 6
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 7
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 4
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 5
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 10
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 11
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 8
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 9
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr4, $xr2, 2
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 14
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 15
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 12
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 0
+; LA64-NEXT:    vpickve2gr.b $a0, $vr0, 13
+; LA64-NEXT:    andi $a0, $a0, 255
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr5, $xr2, 2
+; LA64-NEXT:    xvst $xr5, $a1, 96
+; LA64-NEXT:    xvst $xr4, $a1, 64
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    xvst $xr1, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <16 x i8>, ptr %ptr
+  %B = zext <16 x i8> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_16i16_to_16i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 5
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 6
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 7
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 2
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr1, 3
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA32-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT:    xvst $xr2, $a1, 0
+; LA32-NEXT:    xvst $xr3, $a1, 32
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_16i16_to_16i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr1, $xr0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 5
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 6
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 7
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr3, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr3, $a0, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr1, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr3, $a0, 3
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 3
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.w $vr2, $a0, 3
+; LA64-NEXT:    xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT:    xvst $xr2, $a1, 0
+; LA64-NEXT:    xvst $xr3, $a1, 32
+; LA64-NEXT:    ret
+entry:
+  %A = load <16 x i16>, ptr %ptr
+  %B = zext <16 x i16> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_16i16_to_16i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    xvld $xr0, $a0, 0
+; LA32-NEXT:    xvpermi.q $xr3, $xr0, 1
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 0
+; LA32-NEXT:    xvrepli.b $xr2, 0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvori.b $xr1, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 2
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 4
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 3
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr1, $a0, 6
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvori.b $xr4, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 5
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 6
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 4
+; LA32-NEXT:    vpickve2gr.h $a0, $vr3, 7
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr4, $a0, 6
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvori.b $xr3, $xr2, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 4
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr3, $a0, 6
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 0
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 2
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 4
+; LA32-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
+; LA32-NEXT:    xvinsgr2vr.w $xr2, $a0, 6
+; LA32-NEXT:    xvst $xr2, $a1, 32
+; LA32-NEXT:    xvst $xr3, $a1, 0
+; LA32-NEXT:    xvst $xr4, $a1, 96
+; LA32-NEXT:    xvst $xr1, $a1, 64
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_16i16_to_16i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    xvld $xr0, $a0, 0
+; LA64-NEXT:    xvpermi.q $xr2, $xr0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr1, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr1, $xr3, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 6
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 7
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr2, 5
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr4, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr4, $xr3, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 2
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 3
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 1
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr3, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr3, $xr2, 2
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 6
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 7
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr2, $a0, 1
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 4
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 0
+; LA64-NEXT:    vpickve2gr.h $a0, $vr0, 5
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; LA64-NEXT:    vinsgr2vr.d $vr5, $a0, 1
+; LA64-NEXT:    xvpermi.q $xr5, $xr2, 2
+; LA64-NEXT:    xvst $xr5, $a1, 32
+; LA64-NEXT:    xvst $xr3, $a1, 0
+; LA64-NEXT:    xvst $xr4, $a1, 96
+; LA64-NEXT:    xvst $xr1, $a1, 64
+; LA64-NEXT:    ret
+entry:
+  %A = load <16 x i16>, ptr %ptr
+  %B = zext <16 x i16> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
+  ret void
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
index dce6dc9f2aa37..cadaf2ffdc4f3 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-sext.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
 ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
 
 define void @load_sext_2i8_to_2i64(ptr %ptr, ptr %dst) {
@@ -21,68 +21,90 @@ entry:
   ret void
 }
 
-define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_4i8_to_4i32:
+define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_2i16_to_2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld.w $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
-; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
 ; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT:    vslli.w $vr0, $vr0, 24
-; CHECK-NEXT:    vsrai.w $vr0, $vr0, 24
+; CHECK-NEXT:    vilvl.w $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 48
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <4 x i8>, ptr %ptr
-  %B = sext <4 x i8> %A to <4 x i32>
-  store <4 x i32> %B, ptr %dst
+  %A = load <2 x i16>, ptr %ptr
+  %B = sext <2 x i16> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
   ret void
 }
 
-define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_8i8_to_8i16:
+define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_2i32_to_2i64:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    ld.w $a2, $a0, 0
 ; LA32-NEXT:    ld.w $a0, $a0, 4
 ; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT:    vilvl.b $vr0, $vr0, $vr0
-; LA32-NEXT:    vslli.h $vr0, $vr0, 8
-; LA32-NEXT:    vsrai.h $vr0, $vr0, 8
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT:    vslli.d $vr0, $vr0, 32
+; LA32-NEXT:    vsrai.d $vr0, $vr0, 32
 ; LA32-NEXT:    vst $vr0, $a1, 0
 ; LA32-NEXT:    ret
 ;
-; LA64-LABEL: load_sext_8i8_to_8i16:
+; LA64-LABEL: load_sext_2i32_to_2i64:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    ld.d $a0, $a0, 0
 ; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT:    vilvl.b $vr0, $vr0, $vr0
-; LA64-NEXT:    vslli.h $vr0, $vr0, 8
-; LA64-NEXT:    vsrai.h $vr0, $vr0, 8
+; LA64-NEXT:    vshuf4i.w $vr0, $vr0, 16
+; LA64-NEXT:    vslli.d $vr0, $vr0, 32
+; LA64-NEXT:    vsrai.d $vr0, $vr0, 32
 ; LA64-NEXT:    vst $vr0, $a1, 0
 ; LA64-NEXT:    ret
 entry:
-  %A = load <8 x i8>, ptr %ptr
-  %B = sext <8 x i8> %A to <8 x i16>
-  store <8 x i16> %B, ptr %dst
+  %A = load <2 x i32>, ptr %ptr
+  %B = sext <2 x i32> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
   ret void
 }
 
-define void @load_sext_2i16_to_2i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_2i16_to_2i64:
+define void @load_sext_4i8_to_4i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_4i8_to_4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld.w $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
 ; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
-; CHECK-NEXT:    vilvl.w $vr0, $vr0, $vr0
-; CHECK-NEXT:    vslli.d $vr0, $vr0, 48
-; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 24
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 24
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <2 x i16>, ptr %ptr
-  %B = sext <2 x i16> %A to <2 x i64>
-  store <2 x i64> %B, ptr %dst
+  %A = load <4 x i8>, ptr %ptr
+  %B = sext <4 x i8> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i8_to_4i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_4i8_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.w $vr1, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 56
+; CHECK-NEXT:    vsrai.d $vr1, $vr1, 56
+; CHECK-NEXT:    vilvh.w $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 56
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 56
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr1, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = sext <4 x i8> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
   ret void
 }
 
@@ -115,31 +137,270 @@ entry:
   ret void
 }
 
-define void @load_sext_2i32_to_2i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_sext_2i32_to_2i64:
+define void @load_sext_4i16_to_4i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_4i16_to_4i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; LA32-NEXT:    vilvl.w $vr0, $vr0, $vr0
+; LA32-NEXT:    vslli.d $vr0, $vr0, 48
+; LA32-NEXT:    vsrai.d $vr0, $vr0, 48
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    vilvl.h $vr1, $vr1, $vr1
+; LA32-NEXT:    vilvl.w $vr1, $vr1, $vr1
+; LA32-NEXT:    vslli.d $vr1, $vr1, 48
+; LA32-NEXT:    vsrai.d $vr1, $vr1, 48
+; LA32-NEXT:    vst $vr1, $a1, 16
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_4i16_to_4i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; LA64-NEXT:    vilvl.w $vr1, $vr0, $vr0
+; LA64-NEXT:    vslli.d $vr1, $vr1, 48
+; LA64-NEXT:    vsrai.d $vr1, $vr1, 48
+; LA64-NEXT:    vilvh.w $vr0, $vr0, $vr0
+; LA64-NEXT:    vslli.d $vr0, $vr0, 48
+; LA64-NEXT:    vsrai.d $vr0, $vr0, 48
+; LA64-NEXT:    vst $vr0, $a1, 16
+; LA64-NEXT:    vst $vr1, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = sext <4 x i16> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_4i32_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vshuf4i.w $vr1, $vr0, 16
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 32
+; CHECK-NEXT:    vsrai.d $vr1, $vr1, 32
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 50
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 32
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 32
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr1, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i32>, ptr %ptr
+  %B = sext <4 x i32> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i16(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i8_to_8i16:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    ld.w $a2, $a0, 0
 ; LA32-NEXT:    ld.w $a0, $a0, 4
 ; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
-; LA32-NEXT:    vslli.d $vr0, $vr0, 32
-; LA32-NEXT:    vsrai.d $vr0, $vr0, 32
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA32-NEXT:    vslli.h $vr0, $vr0, 8
+; LA32-NEXT:    vsrai.h $vr0, $vr0, 8
 ; LA32-NEXT:    vst $vr0, $a1, 0
 ; LA32-NEXT:    ret
 ;
-; LA64-LABEL: load_sext_2i32_to_2i64:
+; LA64-LABEL: load_sext_8i8_to_8i16:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    ld.d $a0, $a0, 0
 ; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
-; LA64-NEXT:    vshuf4i.w $vr0, $vr0, 16
-; LA64-NEXT:    vslli.d $vr0, $vr0, 32
-; LA64-NEXT:    vsrai.d $vr0, $vr0, 32
+; LA64-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA64-NEXT:    vslli.h $vr0, $vr0, 8
+; LA64-NEXT:    vsrai.h $vr0, $vr0, 8
 ; LA64-NEXT:    vst $vr0, $a1, 0
 ; LA64-NEXT:    ret
 entry:
-  %A = load <2 x i32>, ptr %ptr
-  %B = sext <2 x i32> %A to <2 x i64>
-  store <2 x i64> %B, ptr %dst
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i16>
+  store <8 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i8_to_8i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA32-NEXT:    vilvl.h $vr0, $vr0, $vr0
+; LA32-NEXT:    vslli.w $vr0, $vr0, 24
+; LA32-NEXT:    vsrai.w $vr0, $vr0, 24
+; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 0
+; LA32-NEXT:    vilvl.b $vr1, $vr1, $vr1
+; LA32-NEXT:    vilvl.h $vr1, $vr1, $vr1
+; LA32-NEXT:    vslli.w $vr1, $vr1, 24
+; LA32-NEXT:    vsrai.w $vr1, $vr1, 24
+; LA32-NEXT:    vst $vr1, $a1, 16
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i8_to_8i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA64-NEXT:    vilvl.h $vr1, $vr0, $vr0
+; LA64-NEXT:    vslli.w $vr1, $vr1, 24
+; LA64-NEXT:    vsrai.w $vr1, $vr1, 24
+; LA64-NEXT:    vilvh.h $vr0, $vr0, $vr0
+; LA64-NEXT:    vslli.w $vr0, $vr0, 24
+; LA64-NEXT:    vsrai.w $vr0, $vr0, 24
+; LA64-NEXT:    vst $vr0, $a1, 16
+; LA64-NEXT:    vst $vr1, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i8_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_sext_8i8_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA32-NEXT:    vilvl.h $vr1, $vr0, $vr0
+; LA32-NEXT:    vilvl.w $vr2, $vr1, $vr1
+; LA32-NEXT:    vslli.d $vr2, $vr2, 56
+; LA32-NEXT:    vsrai.d $vr2, $vr2, 56
+; LA32-NEXT:    vilvh.w $vr1, $vr1, $vr1
+; LA32-NEXT:    vslli.d $vr1, $vr1, 56
+; LA32-NEXT:    vsrai.d $vr1, $vr1, 56
+; LA32-NEXT:    vilvh.h $vr0, $vr0, $vr0
+; LA32-NEXT:    vilvl.w $vr3, $vr0, $vr0
+; LA32-NEXT:    vslli.d $vr3, $vr3, 56
+; LA32-NEXT:    vsrai.d $vr3, $vr3, 56
+; LA32-NEXT:    vilvh.w $vr0, $vr0, $vr0
+; LA32-NEXT:    vslli.d $vr0, $vr0, 56
+; LA32-NEXT:    vsrai.d $vr0, $vr0, 56
+; LA32-NEXT:    vst $vr0, $a1, 48
+; LA32-NEXT:    vst $vr3, $a1, 32
+; LA32-NEXT:    vst $vr1, $a1, 16
+; LA32-NEXT:    vst $vr2, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_sext_8i8_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vilvl.b $vr0, $vr0, $vr0
+; LA64-NEXT:    vilvl.h $vr1, $vr0, $vr0
+; LA64-NEXT:    vilvl.w $vr2, $vr1, $vr1
+; LA64-NEXT:    vslli.d $vr2, $vr2, 56
+; LA64-NEXT:    vsrai.d $vr2, $vr2, 56
+; LA64-NEXT:    vilvh.w $vr1, $vr1, $vr1
+; LA64-NEXT:    vslli.d $vr1, $vr1, 56
+; LA64-NEXT:    vsrai.d $vr1, $vr1, 56
+; LA64-NEXT:    vilvh.h $vr0, $vr0, $vr0
+; LA64-NEXT:    vilvl.w $vr3, $vr0, $vr0
+; LA64-NEXT:    vslli.d $vr3, $vr3, 56
+; LA64-NEXT:    vsrai.d $vr3, $vr3, 56
+; LA64-NEXT:    vilvh.w $vr0, $vr0, $vr0
+; LA64-NEXT:    vslli.d $vr0, $vr0, 56
+; LA64-NEXT:    vsrai.d $vr0, $vr0, 56
+; LA64-NEXT:    vst $vr0, $a1, 48
+; LA64-NEXT:    vst $vr3, $a1, 32
+; LA64-NEXT:    vst $vr1, $a1, 16
+; LA64-NEXT:    vst $vr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = sext <8 x i8> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.h $vr1, $vr0, $vr0
+; CHECK-NEXT:    vslli.w $vr1, $vr1, 16
+; CHECK-NEXT:    vsrai.w $vr1, $vr1, 16
+; CHECK-NEXT:    vilvh.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.w $vr0, $vr0, 16
+; CHECK-NEXT:    vsrai.w $vr0, $vr0, 16
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr1, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = sext <8 x i16> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_8i16_to_8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vilvl.h $vr1, $vr0, $vr0
+; CHECK-NEXT:    vilvl.w $vr2, $vr1, $vr1
+; CHECK-NEXT:    vslli.d $vr2, $vr2, 48
+; CHECK-NEXT:    vsrai.d $vr2, $vr2, 48
+; CHECK-NEXT:    vilvh.w $vr1, $vr1, $vr1
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 48
+; CHECK-NEXT:    vsrai.d $vr1, $vr1, 48
+; CHECK-NEXT:    vilvh.h $vr0, $vr0, $vr0
+; CHECK-NEXT:    vilvl.w $vr3, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 48
+; CHECK-NEXT:    vsrai.d $vr3, $vr3, 48
+; CHECK-NEXT:    vilvh.w $vr0, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 48
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT:    vst $vr0, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
+; CHECK-NEXT:    vst $vr1, $a1, 16
+; CHECK-NEXT:    vst $vr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = sext <8 x i16> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_sext_8i32_to_8i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_8i32_to_8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vld $vr1, $a0, 16
+; CHECK-NEXT:    vshuf4i.w $vr2, $vr0, 16
+; CHECK-NEXT:    vslli.d $vr2, $vr2, 32
+; CHECK-NEXT:    vsrai.d $vr2, $vr2, 32
+; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 50
+; CHECK-NEXT:    vslli.d $vr0, $vr0, 32
+; CHECK-NEXT:    vsrai.d $vr0, $vr0, 32
+; CHECK-NEXT:    vshuf4i.w $vr3, $vr1, 16
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 32
+; CHECK-NEXT:    vsrai.d $vr3, $vr3, 32
+; CHECK-NEXT:    vshuf4i.w $vr1, $vr1, 50
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 32
+; CHECK-NEXT:    vsrai.d $vr1, $vr1, 32
+; CHECK-NEXT:    vst $vr1, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i32>, ptr %ptr
+  %B = sext <8 x i32> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
   ret void
 }
 
@@ -243,72 +504,80 @@ entry:
   ret void
 }
 
-define void @load_sext_8i16_to_8i32(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_8i16_to_8i32:
+define void @load_sext_16i16_to_16i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_16i16_to_16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vilvl.h $vr1, $vr0, $vr0
-; CHECK-NEXT:    vslli.w $vr1, $vr1, 16
-; CHECK-NEXT:    vsrai.w $vr1, $vr1, 16
+; CHECK-NEXT:    vld $vr1, $a0, 16
+; CHECK-NEXT:    vilvl.h $vr2, $vr0, $vr0
+; CHECK-NEXT:    vslli.w $vr2, $vr2, 16
+; CHECK-NEXT:    vsrai.w $vr2, $vr2, 16
 ; CHECK-NEXT:    vilvh.h $vr0, $vr0, $vr0
 ; CHECK-NEXT:    vslli.w $vr0, $vr0, 16
 ; CHECK-NEXT:    vsrai.w $vr0, $vr0, 16
+; CHECK-NEXT:    vilvl.h $vr3, $vr1, $vr1
+; CHECK-NEXT:    vslli.w $vr3, $vr3, 16
+; CHECK-NEXT:    vsrai.w $vr3, $vr3, 16
+; CHECK-NEXT:    vilvh.h $vr1, $vr1, $vr1
+; CHECK-NEXT:    vslli.w $vr1, $vr1, 16
+; CHECK-NEXT:    vsrai.w $vr1, $vr1, 16
+; CHECK-NEXT:    vst $vr1, $a1, 48
+; CHECK-NEXT:    vst $vr3, $a1, 32
 ; CHECK-NEXT:    vst $vr0, $a1, 16
-; CHECK-NEXT:    vst $vr1, $a1, 0
+; CHECK-NEXT:    vst $vr2, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <8 x i16>, ptr %ptr
-  %B = sext <8 x i16> %A to <8 x i32>
-  store <8 x i32> %B, ptr %dst
+  %A = load <16 x i16>, ptr %ptr
+  %B = sext <16 x i16> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
   ret void
 }
 
-define void @load_sext_8i16_to_8i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_8i16_to_8i64:
+define void @load_sext_16i16_to_16i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_sext_16i16_to_16i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vilvl.h $vr1, $vr0, $vr0
-; CHECK-NEXT:    vilvl.w $vr2, $vr1, $vr1
+; CHECK-NEXT:    vld $vr1, $a0, 16
+; CHECK-NEXT:    vilvl.h $vr2, $vr0, $vr0
+; CHECK-NEXT:    vilvl.w $vr3, $vr2, $vr2
+; CHECK-NEXT:    vslli.d $vr3, $vr3, 48
+; CHECK-NEXT:    vsrai.d $vr3, $vr3, 48
+; CHECK-NEXT:    vilvh.w $vr2, $vr2, $vr2
 ; CHECK-NEXT:    vslli.d $vr2, $vr2, 48
 ; CHECK-NEXT:    vsrai.d $vr2, $vr2, 48
-; CHECK-NEXT:    vilvh.w $vr1, $vr1, $vr1
-; CHECK-NEXT:    vslli.d $vr1, $vr1, 48
-; CHECK-NEXT:    vsrai.d $vr1, $vr1, 48
 ; CHECK-NEXT:    vilvh.h $vr0, $vr0, $vr0
-; CHECK-NEXT:    vilvl.w $vr3, $vr0, $vr0
-; CHECK-NEXT:    vslli.d $vr3, $vr3, 48
-; CHECK-NEXT:    vsrai.d $vr3, $vr3, 48
+; CHECK-NEXT:    vilvl.w $vr4, $vr0, $vr0
+; CHECK-NEXT:    vslli.d $vr4, $vr4, 48
+; CHECK-NEXT:    vsrai.d $vr4, $vr4, 48
 ; CHECK-NEXT:    vilvh.w $vr0, $vr0, $vr0
 ; CHECK-NEXT:    vslli.d $vr0, $vr0, 48
 ; CHECK-NEXT:    vsrai.d $vr0, $vr0, 48
+; CHECK-NEXT:    vilvl.h $vr5, $vr1, $vr1
+; CHECK-NEXT:    vilvl.w $vr6, $vr5, $vr5
+; CHECK-NEXT:    vslli.d $vr6, $vr6, 48
+; CHECK-NEXT:    vsrai.d $vr6, $vr6, 48
+; CHECK-NEXT:    vilvh.w $vr5, $vr5, $vr5
+; CHECK-NEXT:    vslli.d $vr5, $vr5, 48
+; CHECK-NEXT:    vsrai.d $vr5, $vr5, 48
+; CHECK-NEXT:    vilvh.h $vr1, $vr1, $vr1
+; CHECK-NEXT:    vilvl.w $vr7, $vr1, $vr1
+; CHECK-NEXT:    vslli.d $vr7, $vr7, 48
+; CHECK-NEXT:    vsrai.d $vr7, $vr7, 48
+; CHECK-NEXT:    vilvh.w $vr1, $vr1, $vr1
+; CHECK-NEXT:    vslli.d $vr1, $vr1, 48
+; CHECK-NEXT:    vsrai.d $vr1, $vr1, 48
+; CHECK-NEXT:    vst $vr1, $a1, 112
+; CHECK-NEXT:    vst $vr7, $a1, 96
+; CHECK-NEXT:    vst $vr5, $a1, 80
+; CHECK-NEXT:    vst $vr6, $a1, 64
 ; CHECK-NEXT:    vst $vr0, $a1, 48
-; CHECK-NEXT:    vst $vr3, $a1, 32
-; CHECK-NEXT:    vst $vr1, $a1, 16
-; CHECK-NEXT:    vst $vr2, $a1, 0
-; CHECK-NEXT:    ret
-entry:
-  %A = load <8 x i16>, ptr %ptr
-  %B = sext <8 x i16> %A to <8 x i64>
-  store <8 x i64> %B, ptr %dst
-  ret void
-}
-
-define void @load_sext_4i32_to_4i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_sext_4i32_to_4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vshuf4i.w $vr1, $vr0, 16
-; CHECK-NEXT:    vslli.d $vr1, $vr1, 32
-; CHECK-NEXT:    vsrai.d $vr1, $vr1, 32
-; CHECK-NEXT:    vshuf4i.w $vr0, $vr0, 50
-; CHECK-NEXT:    vslli.d $vr0, $vr0, 32
-; CHECK-NEXT:    vsrai.d $vr0, $vr0, 32
-; CHECK-NEXT:    vst $vr0, $a1, 16
-; CHECK-NEXT:    vst $vr1, $a1, 0
+; CHECK-NEXT:    vst $vr4, $a1, 32
+; CHECK-NEXT:    vst $vr2, $a1, 16
+; CHECK-NEXT:    vst $vr3, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <4 x i32>, ptr %ptr
-  %B = sext <4 x i32> %A to <4 x i64>
-  store <4 x i64> %B, ptr %dst
+  %A = load <16 x i16>, ptr %ptr
+  %B = sext <16 x i16> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
   ret void
 }
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
index 602c0f1a5a910..2ace0bf34021c 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/vec-zext.ll
@@ -2,7 +2,6 @@
 ; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32
 ; RUN: llc --mtriple=loongarch64 --mattr=+lsx %s -o - | FileCheck %s --check-prefixes=CHECK,LA64
 
-
 define void @load_zext_2i8_to_2i64(ptr %ptr, ptr %dst) {
 ; CHECK-LABEL: load_zext_2i8_to_2i64:
 ; CHECK:       # %bb.0: # %entry
@@ -21,64 +20,83 @@ entry:
   ret void
 }
 
-define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_zext_4i8_to_4i32:
+define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_2i16_to_2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld.w $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
 ; CHECK-NEXT:    vrepli.b $vr1, 0
-; CHECK-NEXT:    vilvl.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <4 x i8>, ptr %ptr
-  %B = zext <4 x i8> %A to <4 x i32>
-  store <4 x i32> %B, ptr %dst
+  %A = load <2 x i16>, ptr %ptr
+  %B = zext <2 x i16> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
   ret void
 }
 
-define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_8i8_to_8i16:
+define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_2i32_to_2i64:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    ld.w $a2, $a0, 0
 ; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vrepli.b $vr0, 0
 ; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
-; LA32-NEXT:    vrepli.b $vr1, 0
-; LA32-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
 ; LA32-NEXT:    vst $vr0, $a1, 0
 ; LA32-NEXT:    ret
 ;
-; LA64-LABEL: load_zext_8i8_to_8i16:
+; LA64-LABEL: load_zext_2i32_to_2i64:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    ld.d $a0, $a0, 0
 ; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
 ; LA64-NEXT:    vrepli.b $vr1, 0
-; LA64-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT:    vilvl.w $vr0, $vr1, $vr0
 ; LA64-NEXT:    vst $vr0, $a1, 0
 ; LA64-NEXT:    ret
 entry:
-  %A = load <8 x i8>, ptr %ptr
-  %B = zext <8 x i8> %A to <8 x i16>
-  store <8 x i16> %B, ptr %dst
+  %A = load <2 x i32>, ptr %ptr
+  %B = zext <2 x i32> %A to <2 x i64>
+  store <2 x i64> %B, ptr %dst
   ret void
 }
 
-define void @load_zext_2i16_to_2i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_zext_2i16_to_2i64:
+define void @load_zext_4i8_to_4i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_4i8_to_4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    ld.w $a0, $a0, 0
 ; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
 ; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vilvl.h $vr0, $vr1, $vr0
-; CHECK-NEXT:    vilvl.w $vr0, $vr1, $vr0
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <2 x i16>, ptr %ptr
-  %B = zext <2 x i16> %A to <2 x i64>
-  store <2 x i64> %B, ptr %dst
+  %A = load <4 x i8>, ptr %ptr
+  %B = zext <4 x i8> %A to <4 x i32>
+  store <4 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i8_to_4i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_4i8_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    ld.w $a0, $a0, 0
+; CHECK-NEXT:    vinsgr2vr.w $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.w $vr2, $vr1, $vr0
+; CHECK-NEXT:    vilvh.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i8>, ptr %ptr
+  %B = zext <4 x i8> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
   ret void
 }
 
@@ -109,29 +127,222 @@ entry:
   ret void
 }
 
-define void @load_zext_2i32_to_2i64(ptr %ptr, ptr %dst) {
-; LA32-LABEL: load_zext_2i32_to_2i64:
+define void @load_zext_4i16_to_4i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_4i16_to_4i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vrepli.b $vr1, 0
+; LA32-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; LA32-NEXT:    vilvl.w $vr0, $vr1, $vr0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    vilvl.h $vr2, $vr1, $vr2
+; LA32-NEXT:    vilvl.w $vr1, $vr1, $vr2
+; LA32-NEXT:    vst $vr1, $a1, 16
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_4i16_to_4i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vrepli.b $vr1, 0
+; LA64-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; LA64-NEXT:    vilvl.w $vr2, $vr1, $vr0
+; LA64-NEXT:    vilvh.w $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a1, 16
+; LA64-NEXT:    vst $vr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <4 x i16>, ptr %ptr
+  %B = zext <4 x i16> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_4i32_to_4i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.w $vr2, $vr1, $vr0
+; CHECK-NEXT:    vilvh.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <4 x i32>, ptr %ptr
+  %B = zext <4 x i32> %A to <4 x i64>
+  store <4 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i16(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i8_to_8i16:
 ; LA32:       # %bb.0: # %entry
 ; LA32-NEXT:    ld.w $a2, $a0, 0
 ; LA32-NEXT:    ld.w $a0, $a0, 4
-; LA32-NEXT:    vrepli.b $vr0, 0
 ; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
-; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 2
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vrepli.b $vr1, 0
+; LA32-NEXT:    vilvl.b $vr0, $vr1, $vr0
 ; LA32-NEXT:    vst $vr0, $a1, 0
 ; LA32-NEXT:    ret
 ;
-; LA64-LABEL: load_zext_2i32_to_2i64:
+; LA64-LABEL: load_zext_8i8_to_8i16:
 ; LA64:       # %bb.0: # %entry
 ; LA64-NEXT:    ld.d $a0, $a0, 0
 ; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
 ; LA64-NEXT:    vrepli.b $vr1, 0
-; LA64-NEXT:    vilvl.w $vr0, $vr1, $vr0
+; LA64-NEXT:    vilvl.b $vr0, $vr1, $vr0
 ; LA64-NEXT:    vst $vr0, $a1, 0
 ; LA64-NEXT:    ret
 entry:
-  %A = load <2 x i32>, ptr %ptr
-  %B = zext <2 x i32> %A to <2 x i64>
-  store <2 x i64> %B, ptr %dst
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i16>
+  store <8 x i16> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i32(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i8_to_8i32:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vrepli.b $vr1, 0
+; LA32-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT:    vilvl.h $vr0, $vr1, $vr0
+; LA32-NEXT:    vinsgr2vr.w $vr2, $a0, 0
+; LA32-NEXT:    vilvl.b $vr2, $vr1, $vr2
+; LA32-NEXT:    vilvl.h $vr1, $vr1, $vr2
+; LA32-NEXT:    vst $vr1, $a1, 16
+; LA32-NEXT:    vst $vr0, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i8_to_8i32:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vrepli.b $vr1, 0
+; LA64-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT:    vilvl.h $vr2, $vr1, $vr0
+; LA64-NEXT:    vilvh.h $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a1, 16
+; LA64-NEXT:    vst $vr2, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i8_to_8i64(ptr %ptr, ptr %dst) {
+; LA32-LABEL: load_zext_8i8_to_8i64:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    ld.w $a2, $a0, 0
+; LA32-NEXT:    ld.w $a0, $a0, 4
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a2, 0
+; LA32-NEXT:    vinsgr2vr.w $vr0, $a0, 1
+; LA32-NEXT:    vrepli.b $vr1, 0
+; LA32-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA32-NEXT:    vilvl.h $vr2, $vr1, $vr0
+; LA32-NEXT:    vilvl.w $vr3, $vr1, $vr2
+; LA32-NEXT:    vilvh.w $vr2, $vr1, $vr2
+; LA32-NEXT:    vilvh.h $vr0, $vr1, $vr0
+; LA32-NEXT:    vilvl.w $vr4, $vr1, $vr0
+; LA32-NEXT:    vilvh.w $vr0, $vr1, $vr0
+; LA32-NEXT:    vst $vr0, $a1, 48
+; LA32-NEXT:    vst $vr4, $a1, 32
+; LA32-NEXT:    vst $vr2, $a1, 16
+; LA32-NEXT:    vst $vr3, $a1, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_zext_8i8_to_8i64:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    ld.d $a0, $a0, 0
+; LA64-NEXT:    vinsgr2vr.d $vr0, $a0, 0
+; LA64-NEXT:    vrepli.b $vr1, 0
+; LA64-NEXT:    vilvl.b $vr0, $vr1, $vr0
+; LA64-NEXT:    vilvl.h $vr2, $vr1, $vr0
+; LA64-NEXT:    vilvl.w $vr3, $vr1, $vr2
+; LA64-NEXT:    vilvh.w $vr2, $vr1, $vr2
+; LA64-NEXT:    vilvh.h $vr0, $vr1, $vr0
+; LA64-NEXT:    vilvl.w $vr4, $vr1, $vr0
+; LA64-NEXT:    vilvh.w $vr0, $vr1, $vr0
+; LA64-NEXT:    vst $vr0, $a1, 48
+; LA64-NEXT:    vst $vr4, $a1, 32
+; LA64-NEXT:    vst $vr2, $a1, 16
+; LA64-NEXT:    vst $vr3, $a1, 0
+; LA64-NEXT:    ret
+entry:
+  %A = load <8 x i8>, ptr %ptr
+  %B = zext <8 x i8> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_8i16_to_8i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.h $vr2, $vr1, $vr0
+; CHECK-NEXT:    vilvh.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr2, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = zext <8 x i16> %A to <8 x i32>
+  store <8 x i32> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_8i16_to_8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vilvl.h $vr2, $vr1, $vr0
+; CHECK-NEXT:    vilvl.w $vr3, $vr1, $vr2
+; CHECK-NEXT:    vilvh.w $vr2, $vr1, $vr2
+; CHECK-NEXT:    vilvh.h $vr0, $vr1, $vr0
+; CHECK-NEXT:    vilvl.w $vr4, $vr1, $vr0
+; CHECK-NEXT:    vilvh.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vst $vr0, $a1, 48
+; CHECK-NEXT:    vst $vr4, $a1, 32
+; CHECK-NEXT:    vst $vr2, $a1, 16
+; CHECK-NEXT:    vst $vr3, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i16>, ptr %ptr
+  %B = zext <8 x i16> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
+  ret void
+}
+
+define void @load_zext_8i32_to_8i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_8i32_to_8i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vld $vr0, $a0, 0
+; CHECK-NEXT:    vld $vr1, $a0, 16
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vilvl.w $vr3, $vr2, $vr0
+; CHECK-NEXT:    vilvh.w $vr0, $vr2, $vr0
+; CHECK-NEXT:    vilvl.w $vr4, $vr2, $vr1
+; CHECK-NEXT:    vilvh.w $vr1, $vr2, $vr1
+; CHECK-NEXT:    vst $vr1, $a1, 48
+; CHECK-NEXT:    vst $vr4, $a1, 32
+; CHECK-NEXT:    vst $vr0, $a1, 16
+; CHECK-NEXT:    vst $vr3, $a1, 0
+; CHECK-NEXT:    ret
+entry:
+  %A = load <8 x i32>, ptr %ptr
+  %B = zext <8 x i32> %A to <8 x i64>
+  store <8 x i64> %B, ptr %dst
   ret void
 }
 
@@ -210,59 +421,58 @@ entry:
   ret void
 }
 
-define void @load_zext_8i16_to_8i32(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_zext_8i16_to_8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vrepli.b $vr1, 0
-; CHECK-NEXT:    vilvl.h $vr2, $vr1, $vr0
-; CHECK-NEXT:    vilvh.h $vr0, $vr1, $vr0
-; CHECK-NEXT:    vst $vr0, $a1, 16
-; CHECK-NEXT:    vst $vr2, $a1, 0
-; CHECK-NEXT:    ret
-entry:
-  %A = load <8 x i16>, ptr %ptr
-  %B = zext <8 x i16> %A to <8 x i32>
-  store <8 x i32> %B, ptr %dst
-  ret void
-}
-
-define void @load_zext_8i16_to_8i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_zext_8i16_to_8i64:
+define void @load_zext_16i16_to_16i32(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_16i16_to_16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vrepli.b $vr1, 0
-; CHECK-NEXT:    vilvl.h $vr2, $vr1, $vr0
-; CHECK-NEXT:    vilvl.w $vr3, $vr1, $vr2
-; CHECK-NEXT:    vilvh.w $vr2, $vr1, $vr2
-; CHECK-NEXT:    vilvh.h $vr0, $vr1, $vr0
-; CHECK-NEXT:    vilvl.w $vr4, $vr1, $vr0
-; CHECK-NEXT:    vilvh.w $vr0, $vr1, $vr0
-; CHECK-NEXT:    vst $vr0, $a1, 48
+; CHECK-NEXT:    vld $vr1, $a0, 16
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vilvl.h $vr3, $vr2, $vr0
+; CHECK-NEXT:    vilvh.h $vr0, $vr2, $vr0
+; CHECK-NEXT:    vilvl.h $vr4, $vr2, $vr1
+; CHECK-NEXT:    vilvh.h $vr1, $vr2, $vr1
+; CHECK-NEXT:    vst $vr1, $a1, 48
 ; CHECK-NEXT:    vst $vr4, $a1, 32
-; CHECK-NEXT:    vst $vr2, $a1, 16
+; CHECK-NEXT:    vst $vr0, $a1, 16
 ; CHECK-NEXT:    vst $vr3, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <8 x i16>, ptr %ptr
-  %B = zext <8 x i16> %A to <8 x i64>
-  store <8 x i64> %B, ptr %dst
+  %A = load <16 x i16>, ptr %ptr
+  %B = zext <16 x i16> %A to <16 x i32>
+  store <16 x i32> %B, ptr %dst
   ret void
 }
 
-define void @load_zext_4i32_to_4i64(ptr %ptr, ptr %dst) {
-; CHECK-LABEL: load_zext_4i32_to_4i64:
+define void @load_zext_16i16_to_16i64(ptr %ptr, ptr %dst) {
+; CHECK-LABEL: load_zext_16i16_to_16i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vrepli.b $vr1, 0
-; CHECK-NEXT:    vilvl.w $vr2, $vr1, $vr0
-; CHECK-NEXT:    vilvh.w $vr0, $vr1, $vr0
-; CHECK-NEXT:    vst $vr0, $a1, 16
-; CHECK-NEXT:    vst $vr2, $a1, 0
+; CHECK-NEXT:    vld $vr1, $a0, 16
+; CHECK-NEXT:    vrepli.b $vr2, 0
+; CHECK-NEXT:    vilvl.h $vr3, $vr2, $vr0
+; CHECK-NEXT:    vilvl.w $vr4, $vr2, $vr3
+; CHECK-NEXT:    vilvh.w $vr3, $vr2, $vr3
+; CHECK-NEXT:    vilvh.h $vr0, $vr2, $vr0
+; CHECK-NEXT:    vilvl.w $vr5, $vr2, $vr0
+; CHECK-NEXT:    vilvh.w $vr0, $vr2, $vr0
+; CHECK-NEXT:    vilvl.h $vr6, $vr2, $vr1
+; CHECK-NEXT:    vilvl.w $vr7, $vr2, $vr6
+; CHECK-NEXT:    vilvh.w $vr6, $vr2, $vr6
+; CHECK-NEXT:    vilvh.h $vr1, $vr2, $vr1
+; CHECK-NEXT:    vilvl.w $vr8, $vr2, $vr1
+; CHECK-NEXT:    vilvh.w $vr1, $vr2, $vr1
+; CHECK-NEXT:    vst $vr1, $a1, 112
+; CHECK-NEXT:    vst $vr8, $a1, 96
+; CHECK-NEXT:    vst $vr6, $a1, 80
+; CHECK-NEXT:    vst $vr7, $a1, 64
+; CHECK-NEXT:    vst $vr0, $a1, 48
+; CHECK-NEXT:    vst $vr5, $a1, 32
+; CHECK-NEXT:    vst $vr3, $a1, 16
+; CHECK-NEXT:    vst $vr4, $a1, 0
 ; CHECK-NEXT:    ret
 entry:
-  %A = load <4 x i32>, ptr %ptr
-  %B = zext <4 x i32> %A to <4 x i64>
-  store <4 x i64> %B, ptr %dst
+  %A = load <16 x i16>, ptr %ptr
+  %B = zext <16 x i16> %A to <16 x i64>
+  store <16 x i64> %B, ptr %dst
   ret void
 }



More information about the llvm-commits mailing list