[llvm] [LoongArch] Precommit tests for 128-to-256-bit vector insertion and 256-to-128-bit subvector extraction (NFC) (PR #146299)
Lu Weining via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 30 00:47:50 PDT 2025
================
@@ -0,0 +1,1478 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+declare <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32>, <4 x i32>, i64)
+
+define <8 x i32> @insert_lo128_v8i32_1(<4 x i32> %a) {
+; CHECK-LABEL: insert_lo128_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_hi128_v8i32_1(<4 x i32> %a) {
+; CHECK-LABEL: insert_hi128_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %a, i64 4)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_lo128_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_lo128_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_hi128_v8i32_2(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_hi128_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> poison, <4 x i32> %b, i64 4)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_lo128_v8i32_3(<8 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_lo128_v8i32_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 0)
+ ret <8 x i32> %1
+}
+
+define <8 x i32> @insert_hi128_v8i32_3(<8 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: insert_hi128_v8i32_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x i32> @llvm.experimental.vector.insert.v8i32.v4i32(<8 x i32> %a, <4 x i32> %b, i64 4)
+ ret <8 x i32> %1
+}
+
+declare <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float>, <4 x float>, i64)
+
+define <8 x float> @insert_lo128_v8f32_1(<4 x float> %a) {
+; CHECK-LABEL: insert_lo128_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 0)
+ ret <8 x float> %1
+}
+
+define <8 x float> @insert_hi128_v8f32_1(<4 x float> %a) {
+; CHECK-LABEL: insert_hi128_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %a, i64 4)
+ ret <8 x float> %1
+}
+
+define <8 x float> @insert_lo128_v8f32_2(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: insert_lo128_v8f32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 0)
+ ret <8 x float> %1
+}
+
+define <8 x float> @insert_hi128_v8f32_2(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: insert_hi128_v8f32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> %b, i64 4)
+ ret <8 x float> %1
+}
+
+define <8 x float> @insert_lo128_v8f32_3(<8 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: insert_lo128_v8f32_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 0)
+ ret <8 x float> %1
+}
+
+define <8 x float> @insert_hi128_v8f32_3(<8 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: insert_hi128_v8f32_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <8 x float> @llvm.experimental.vector.insert.v8f32.v4f32(<8 x float> %a, <4 x float> %b, i64 4)
+ ret <8 x float> %1
+}
+
+declare <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64>, <2 x i64>, i64)
+
+define <4 x i64> @insert_lo128_v4i64_1(<2 x i64> %a) {
+; CHECK-LABEL: insert_lo128_v4i64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @insert_hi128_v4i64_1(<2 x i64> %a) {
+; CHECK-LABEL: insert_hi128_v4i64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %a, i64 2)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @insert_lo128_v4i64_2(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: insert_lo128_v4i64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @insert_hi128_v4i64_2(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: insert_hi128_v4i64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> poison, <2 x i64> %b, i64 2)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @insert_lo128_v4i64_3(<4 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: insert_lo128_v4i64_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 0)
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @insert_hi128_v4i64_3(<4 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: insert_hi128_v4i64_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x i64> @llvm.experimental.vector.insert.v4i64.v2i64(<4 x i64> %a, <2 x i64> %b, i64 2)
+ ret <4 x i64> %1
+}
+
+declare <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double>, <2 x double>, i64)
+
+define <4 x double> @insert_lo128_v4f64_1(<2 x double> %a) {
+; CHECK-LABEL: insert_lo128_v4f64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 0)
+ ret <4 x double> %1
+}
+
+define <4 x double> @insert_hi128_v4f64_1(<2 x double> %a) {
+; CHECK-LABEL: insert_hi128_v4f64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %a, i64 2)
+ ret <4 x double> %1
+}
+
+define <4 x double> @insert_lo128_v4f64_2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: insert_lo128_v4f64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 0)
+ ret <4 x double> %1
+}
+
+define <4 x double> @insert_hi128_v4f64_2(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: insert_hi128_v4f64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> poison, <2 x double> %b, i64 2)
+ ret <4 x double> %1
+}
+
+define <4 x double> @insert_lo128_v4f64_3(<4 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: insert_lo128_v4f64_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 0)
+ ret <4 x double> %1
+}
+
+define <4 x double> @insert_hi128_v4f64_3(<4 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: insert_hi128_v4f64_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <4 x double> @llvm.experimental.vector.insert.v4f64.v2f64(<4 x double> %a, <2 x double> %b, i64 2)
+ ret <4 x double> %1
+}
+
+declare <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16>, <8 x i16>, i64)
+
+define <16 x i16> @insert_lo128_v16i16_1(<8 x i16> %a) {
+; CHECK-LABEL: insert_lo128_v16i16_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @insert_hi128_v16i16_1(<8 x i16> %a) {
+; CHECK-LABEL: insert_hi128_v16i16_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %a, i64 8)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @insert_lo128_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: insert_lo128_v16i16_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @insert_hi128_v16i16_2(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: insert_hi128_v16i16_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> poison, <8 x i16> %b, i64 8)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @insert_lo128_v16i16_3(<16 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: insert_lo128_v16i16_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 0)
+ ret <16 x i16> %1
+}
+
+define <16 x i16> @insert_hi128_v16i16_3(<16 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: insert_hi128_v16i16_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <16 x i16> @llvm.experimental.vector.insert.v16i16.v8i16(<16 x i16> %a, <8 x i16> %b, i64 8)
+ ret <16 x i16> %1
+}
+
+declare <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8>, <16 x i8>, i64)
+
+define <32 x i8> @insert_lo128_v32i8_1(<16 x i8> %a) {
+; CHECK-LABEL: insert_lo128_v32i8_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 0)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @insert_hi128_v32i8_1(<16 x i8> %a) {
+; CHECK-LABEL: insert_hi128_v32i8_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr0, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %a, i64 16)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @insert_lo128_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: insert_lo128_v32i8_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 0)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @insert_hi128_v32i8_2(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: insert_hi128_v32i8_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> poison, <16 x i8> %b, i64 16)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @insert_lo128_v32i8_3(<32 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: insert_lo128_v32i8_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 0
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 0)
+ ret <32 x i8> %1
+}
+
+define <32 x i8> @insert_hi128_v32i8_3(<32 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: insert_hi128_v32i8_3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vst $vr1, $sp, 16
+; CHECK-NEXT: xvld $xr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = call <32 x i8> @llvm.experimental.vector.insert.v32i8.v16i8(<32 x i8> %a, <16 x i8> %b, i64 16)
+ ret <32 x i8> %1
+}
+
+define <4 x i32> @extract_lo128_v8i32_1(<8 x i32> %a) {
+; CHECK-LABEL: extract_lo128_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @extract_hi128_v8i32_1(<8 x i32> %a) {
+; CHECK-LABEL: extract_hi128_v8i32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i32> %a, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @extract_lo128_v8i32_2(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: extract_lo128_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %1
+}
+
+define <4 x i32> @extract_hi128_v8i32_2(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: extract_hi128_v8i32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x i32> %b, <8 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x i32> %1
+}
+
+define <4 x float> @extract_lo128_v8f32_1(<8 x float> %a) {
+; CHECK-LABEL: extract_lo128_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %1
+}
+
+define <4 x float> @extract_hi128_v8f32_1(<8 x float> %a) {
+; CHECK-LABEL: extract_hi128_v8f32_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x float> %1
+}
+
+define <4 x float> @extract_lo128_v8f32_2(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: extract_lo128_v8f32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x float> %1
+}
+
+define <4 x float> @extract_hi128_v8f32_2(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: extract_hi128_v8f32_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ ret <4 x float> %1
+}
+
+define <2 x i64> @extract_lo128_v4i64_1(<4 x i64> %a) {
+; CHECK-LABEL: extract_lo128_v4i64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @extract_hi128_v4i64_1(<4 x i64> %a) {
+; CHECK-LABEL: extract_hi128_v4i64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i64> %a, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @extract_lo128_v4i64_2(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: extract_lo128_v4i64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @extract_hi128_v4i64_2(<4 x i64> %a, <4 x i64> %b) {
+; CHECK-LABEL: extract_hi128_v4i64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x i64> %b, <4 x i64> poison, <2 x i32> <i32 2, i32 3>
+ ret <2 x i64> %1
+}
+
+define <2 x double> @extract_lo128_v4f64_a(<4 x double> %a) {
+; CHECK-LABEL: extract_lo128_v4f64_a:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %1
+}
+
+define <2 x double> @extract_hi128_v4f64_1(<4 x double> %a) {
+; CHECK-LABEL: extract_hi128_v4f64_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 2, i32 3>
+ ret <2 x double> %1
+}
+
+define <2 x double> @extract_lo128_v4f64_2(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: extract_lo128_v4f64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 1>
+ ret <2 x double> %1
+}
+
+define <2 x double> @extract_hi128_v4f64_2(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: extract_hi128_v4f64_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 2, i32 3>
+ ret <2 x double> %1
+}
+
+define <8 x i16> @extract_lo128_v16i16_1(<16 x i16> %a) {
+; CHECK-LABEL: extract_lo128_v16i16_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i16> %a, <16 x i16> poison,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @extract_hi128_v16i16_1(<16 x i16> %a) {
+; CHECK-LABEL: extract_hi128_v16i16_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i16> %a, <16 x i16> poison,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @extract_lo128_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
+; CHECK-LABEL: extract_lo128_v16i16_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i16> %b, <16 x i16> poison,
+ <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %1
+}
+
+define <8 x i16> @extract_hi128_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
+; CHECK-LABEL: extract_hi128_v16i16_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <16 x i16> %b, <16 x i16> poison,
+ <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i16> %1
+}
+
+define <16 x i8> @extract_lo128_v32i8_1(<32 x i8> %a) {
+; CHECK-LABEL: extract_lo128_v32i8_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <32 x i8> %a, <32 x i8> poison,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @extract_hi128_v32i8_1(<32 x i8> %a) {
+; CHECK-LABEL: extract_hi128_v32i8_1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr0, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 16
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <32 x i8> %a, <32 x i8> poison,
+ <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23,
+ i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @extract_lo128_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
+; CHECK-LABEL: extract_lo128_v32i8_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi.d $sp, $sp, -64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT: .cfi_offset 1, -8
+; CHECK-NEXT: .cfi_offset 22, -16
+; CHECK-NEXT: addi.d $fp, $sp, 64
+; CHECK-NEXT: .cfi_def_cfa 22, 0
+; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT: xvst $xr1, $sp, 0
+; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: addi.d $sp, $fp, -64
+; CHECK-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT: addi.d $sp, $sp, 64
+; CHECK-NEXT: ret
+entry:
+ %1 = shufflevector <32 x i8> %b, <32 x i8> poison,
+ <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
+ i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @extract_hi128_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
----------------
SixWeining wrote:
What is the purpose of variable `a`?
https://github.com/llvm/llvm-project/pull/146299
More information about the llvm-commits
mailing list