[llvm] [CodeGen] Lower vector interleaves of const splats to a wider splat (PR #151110)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 29 07:10:35 PDT 2025
https://github.com/david-arm updated https://github.com/llvm/llvm-project/pull/151110
>From 5d1f01f15c27402cf9c96a783ce98d05441b2e90 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 29 Jul 2025 14:07:11 +0000
Subject: [PATCH 1/2] Add tests
---
.../AArch64/fixed-vector-interleave.ll | 89 +++++-
.../CodeGen/AArch64/sve-vector-interleave.ll | 170 +++++++++-
.../RISCV/rvv/vector-interleave-fixed.ll | 226 +++++++++++++
.../CodeGen/RISCV/rvv/vector-interleave.ll | 300 ++++++++++++++++++
4 files changed, 783 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index a9618fdc2dec3..54f4543c81d4c 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -131,6 +131,92 @@ define <4 x i64> @interleave2_v4i64(<2 x i64> %vec0, <2 x i64> %vec1) {
ret <4 x i64> %retval
}
+define <4 x i16> @interleave2_same_const_splat_v4i16() {
+; CHECK-SD-LABEL: interleave2_same_const_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.4h, #3
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_same_const_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #3 // =0x3
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: ret
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
+ ret <4 x i16> %retval
+}
+
+define <4 x i16> @interleave2_diff_const_splat_v4i16() {
+; CHECK-SD-LABEL: interleave2_diff_const_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI11_0
+; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI11_0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_diff_const_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #3 // =0x3
+; CHECK-GI-NEXT: mov w9, #4 // =0x4
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: mov v0.h[1], w8
+; CHECK-GI-NEXT: mov v1.h[1], w9
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 4))
+ ret <4 x i16> %retval
+}
+
+define <4 x i16> @interleave2_same_nonconst_splat_v4i16(i16 %a) {
+; CHECK-SD-LABEL: interleave2_same_nonconst_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v0.4h, w0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_same_nonconst_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v0.4h, w0
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v0.4h
+; CHECK-GI-NEXT: ret
+ %ins = insertelement <2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> <i32 0, i32 0>
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> %splat, <2 x i16> %splat)
+ ret <4 x i16> %retval
+}
+
+define <4 x i16> @interleave2_diff_nonconst_splat_v4i16(i16 %a, i16 %b) {
+; CHECK-SD-LABEL: interleave2_diff_nonconst_splat_v4i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w0
+; CHECK-SD-NEXT: mov v0.h[2], w1
+; CHECK-SD-NEXT: mov v0.h[3], w1
+; CHECK-SD-NEXT: rev32 v1.4h, v0.4h
+; CHECK-SD-NEXT: uzp1 v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: interleave2_diff_nonconst_splat_v4i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v0.4h, w0
+; CHECK-GI-NEXT: dup v1.4h, w1
+; CHECK-GI-NEXT: zip1 v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
+ %ins1 = insertelement <2 x i16> poison, i16 %a, i32 0
+ %splat1 = shufflevector <2 x i16> %ins1, <2 x i16> poison, <2 x i32> <i32 0, i32 0>
+ %ins2 = insertelement <2 x i16> poison, i16 %b, i32 0
+ %splat2 = shufflevector <2 x i16> %ins2, <2 x i16> poison, <2 x i32> <i32 0, i32 0>
+ %retval = call <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16> %splat1, <2 x i16> %splat2)
+ ret <4 x i16> %retval
+}
+
+; FIXME: This test crashes during lowering
+;define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) {
+; %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
+; ret <8 x i16> %retval
+;}
+
; Float declarations
declare <4 x half> @llvm.vector.interleave2.v4f16(<2 x half>, <2 x half>)
@@ -145,4 +231,5 @@ declare <32 x i8> @llvm.vector.interleave2.v32i8(<16 x i8>, <16 x i8>)
declare <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16>, <8 x i16>)
declare <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32>, <4 x i32>)
declare <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64>, <2 x i64>)
-
+declare <4 x i16> @llvm.vector.interleave2.v4i16(<2 x i16>, <2 x i16>)
+declare <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>)
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
index 52cb2d9ebe343..b954863560899 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
@@ -267,7 +267,7 @@ define <vscale x 32 x i16> @interleave4_nxv8i16(<vscale x 8 x i16> %vec0, <vscal
; SME2-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3
; SME2-NEXT: zip { z0.h - z3.h }, { z0.h - z3.h }
; SME2-NEXT: ret
- %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
+ %retval = call <vscale x 32 x i16> @llvm.vector.interleave4.nxv32i16(<vscale x 8 x i16> %vec0, <vscale x 8 x i16> %vec1, <vscale x 8 x i16> %vec2, <vscale x 8 x i16> %vec3)
ret <vscale x 32 x i16> %retval
}
@@ -540,6 +540,172 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale
ret <vscale x 4 x i32> %retval
}
+define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
+; SVE-LABEL: interleave2_same_const_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z0.d, #3 // =0x3
+; SVE-NEXT: zip2 z1.d, z0.d, z0.d
+; SVE-NEXT: zip1 z0.d, z0.d, z0.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_same_const_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: mov z0.d, #3 // =0x3
+; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
+; SVE-LABEL: interleave2_diff_const_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z0.d, #4 // =0x4
+; SVE-NEXT: mov z1.d, #3 // =0x3
+; SVE-NEXT: zip2 z2.d, z1.d, z0.d
+; SVE-NEXT: zip1 z0.d, z1.d, z0.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_diff_const_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: mov z0.d, #4 // =0x4
+; SME2-NEXT: mov z1.d, #3 // =0x3
+; SME2-NEXT: zip { z0.d, z1.d }, z1.d, z0.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
+; SVE-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
+; SVE-NEXT: mov z0.d, x0
+; SVE-NEXT: zip2 z1.d, z0.d, z0.d
+; SVE-NEXT: zip1 z0.d, z0.d, z0.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
+; SME2-NEXT: mov z0.d, x0
+; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+ %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) {
+; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $w1 killed $w1 def $x1
+; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
+; SVE-NEXT: mov z0.d, x0
+; SVE-NEXT: mov z1.d, x1
+; SVE-NEXT: zip2 z2.d, z0.d, z1.d
+; SVE-NEXT: zip1 z0.d, z0.d, z1.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $w1 killed $w1 def $x1
+; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
+; SME2-NEXT: mov z0.d, x0
+; SME2-NEXT: mov z1.d, x1
+; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+ %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
+; SVE-LABEL: interleave4_same_const_splat_nxv8i16:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z0.d, #3 // =0x3
+; SVE-NEXT: zip1 z1.d, z0.d, z0.d
+; SVE-NEXT: zip1 z2.d, z1.d, z1.d
+; SVE-NEXT: zip2 z1.d, z1.d, z1.d
+; SVE-NEXT: uzp1 z2.s, z2.s, z0.s
+; SVE-NEXT: uzp1 z2.h, z2.h, z0.h
+; SVE-NEXT: uunpklo z2.s, z2.h
+; SVE-NEXT: uunpklo z2.d, z2.s
+; SVE-NEXT: uzp1 z1.s, z2.s, z1.s
+; SVE-NEXT: uzp1 z2.h, z1.h, z0.h
+; SVE-NEXT: zip2 z0.d, z0.d, z0.d
+; SVE-NEXT: uunpkhi z2.s, z2.h
+; SVE-NEXT: zip1 z3.d, z0.d, z0.d
+; SVE-NEXT: zip2 z0.d, z0.d, z0.d
+; SVE-NEXT: uunpkhi z2.d, z2.s
+; SVE-NEXT: uzp1 z2.s, z3.s, z2.s
+; SVE-NEXT: uzp1 z2.h, z1.h, z2.h
+; SVE-NEXT: uunpkhi z2.s, z2.h
+; SVE-NEXT: uunpklo z2.d, z2.s
+; SVE-NEXT: uzp1 z0.s, z2.s, z0.s
+; SVE-NEXT: uzp1 z0.h, z1.h, z0.h
+; SVE-NEXT: ret
+;
+; SME-ALL-LABEL: interleave4_same_const_splat_nxv8i16:
+; SME-ALL: // %bb.0:
+; SME-ALL-NEXT: mov z0.d, #3 // =0x3
+; SME-ALL-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
+; SME-ALL-NEXT: zip { z2.d, z3.d }, z0.d, z0.d
+; SME-ALL-NEXT: uzp1 z4.s, z2.s, z0.s
+; SME-ALL-NEXT: uzp1 z4.h, z4.h, z0.h
+; SME-ALL-NEXT: uunpklo z4.s, z4.h
+; SME-ALL-NEXT: uunpklo z4.d, z4.s
+; SME-ALL-NEXT: uzp1 z2.s, z4.s, z3.s
+; SME-ALL-NEXT: uzp1 z3.h, z2.h, z0.h
+; SME-ALL-NEXT: zip { z0.d, z1.d }, z1.d, z1.d
+; SME-ALL-NEXT: uunpkhi z3.s, z3.h
+; SME-ALL-NEXT: uunpkhi z3.d, z3.s
+; SME-ALL-NEXT: uzp1 z3.s, z0.s, z3.s
+; SME-ALL-NEXT: uzp1 z3.h, z2.h, z3.h
+; SME-ALL-NEXT: uunpkhi z3.s, z3.h
+; SME-ALL-NEXT: uunpklo z3.d, z3.s
+; SME-ALL-NEXT: uzp1 z0.s, z3.s, z1.s
+; SME-ALL-NEXT: uzp1 z0.h, z2.h, z0.h
+; SME-ALL-NEXT: ret
+;
+; SME2-256-LABEL: interleave4_same_const_splat_nxv8i16:
+; SME2-256: // %bb.0:
+; SME2-256-NEXT: mov z0.d, #3 // =0x3
+; SME2-256-NEXT: mov z1.d, z0.d
+; SME2-256-NEXT: mov z2.d, z0.d
+; SME2-256-NEXT: mov z3.d, z0.d
+; SME2-256-NEXT: zip { z0.d - z3.d }, { z0.d - z3.d }
+; SME2-256-NEXT: uzp1 z4.s, z0.s, z0.s
+; SME2-256-NEXT: uzp1 z4.h, z4.h, z0.h
+; SME2-256-NEXT: uunpklo z4.s, z4.h
+; SME2-256-NEXT: uunpklo z4.d, z4.s
+; SME2-256-NEXT: uzp1 z4.s, z4.s, z1.s
+; SME2-256-NEXT: uzp1 z5.h, z4.h, z0.h
+; SME2-256-NEXT: uunpkhi z5.s, z5.h
+; SME2-256-NEXT: uunpkhi z5.d, z5.s
+; SME2-256-NEXT: uzp1 z5.s, z2.s, z5.s
+; SME2-256-NEXT: uzp1 z5.h, z4.h, z5.h
+; SME2-256-NEXT: uunpkhi z5.s, z5.h
+; SME2-256-NEXT: uunpklo z5.d, z5.s
+; SME2-256-NEXT: uzp1 z0.s, z5.s, z3.s
+; SME2-256-NEXT: uzp1 z0.h, z4.h, z0.h
+; SME2-256-NEXT: ret
+ %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 8 x i16> %retval
+}
+
; Float declarations
declare <vscale x 4 x half> @llvm.vector.interleave2.nxv4f16(<vscale x 2 x half>, <vscale x 2 x half>)
declare <vscale x 8 x half> @llvm.vector.interleave2.nxv8f16(<vscale x 4 x half>, <vscale x 4 x half>)
@@ -567,3 +733,5 @@ declare <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64>,
declare <vscale x 16 x i8> @llvm.vector.interleave2.nxv16i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
declare <vscale x 8 x i16> @llvm.vector.interleave2.nxv8i16(<vscale x 4 x i16>, <vscale x 4 x i16>)
declare <vscale x 4 x i32> @llvm.vector.interleave2.nxv4i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
+declare <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16>, <vscale x 2 x i16>)
+declare <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i16>, <vscale x 2 x i16>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index 3dc83d50ee3f3..f3ba7fe33fa48 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -1636,3 +1636,229 @@ define <8 x half> @vector_interleave8_v8f16_v1f16(<1 x half> %a, <1 x half> %b,
%res = call <8 x half> @llvm.vector.interleave8.v8f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g, <1 x half> %h)
ret <8 x half> %res
}
+
+define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) {
+; CHECK-LABEL: interleave4_const_splat_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: add a2, a0, a1
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsseg4e16.v v8, (a0)
+; CHECK-NEXT: add a3, a2, a1
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: vle16.v v9, (a3)
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vle16.v v11, (a1)
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v9, v11, 2
+; CHECK-NEXT: vslideup.vi v8, v10, 2
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave4_const_splat_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: addi sp, sp, -16
+; ZVBB-NEXT: .cfi_def_cfa_offset 16
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: sub sp, sp, a0
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 3
+; ZVBB-NEXT: addi a0, sp, 16
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: vmv1r.v v9, v8
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vmv1r.v v10, v8
+; ZVBB-NEXT: add a2, a0, a1
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
+; ZVBB-NEXT: vsseg4e16.v v8, (a0)
+; ZVBB-NEXT: add a3, a2, a1
+; ZVBB-NEXT: add a1, a3, a1
+; ZVBB-NEXT: vle16.v v9, (a3)
+; ZVBB-NEXT: vle16.v v10, (a2)
+; ZVBB-NEXT: vle16.v v11, (a1)
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vi v9, v11, 2
+; ZVBB-NEXT: vslideup.vi v8, v10, 2
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vslideup.vi v8, v9, 4
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: add sp, sp, a0
+; ZVBB-NEXT: .cfi_def_cfa sp, 16
+; ZVBB-NEXT: addi sp, sp, 16
+; ZVBB-NEXT: .cfi_def_cfa_offset 0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave4_const_splat_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vmv.v.i v8, 3
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: vmv1r.v v9, v8
+; ZIP-NEXT: srli a1, a1, 2
+; ZIP-NEXT: vmv1r.v v10, v8
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: vmv1r.v v11, v8
+; ZIP-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
+; ZIP-NEXT: vsseg4e16.v v8, (a0)
+; ZIP-NEXT: add a3, a2, a1
+; ZIP-NEXT: add a1, a3, a1
+; ZIP-NEXT: vle16.v v9, (a3)
+; ZIP-NEXT: vle16.v v10, (a2)
+; ZIP-NEXT: vle16.v v11, (a1)
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v11, 2
+; ZIP-NEXT: vslideup.vi v8, v10, 2
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 4
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
+ %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
+ ret <8 x i16> %retval
+}
+
+define <8 x i16> @interleave4_same_nonconst_splat_v8i16(i16 %a) {
+; CHECK-LABEL: interleave4_same_nonconst_splat_v8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: add a2, a0, a1
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vsseg4e16.v v8, (a0)
+; CHECK-NEXT: add a3, a2, a1
+; CHECK-NEXT: add a1, a3, a1
+; CHECK-NEXT: vle16.v v9, (a3)
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vle16.v v11, (a1)
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vslideup.vi v9, v11, 2
+; CHECK-NEXT: vslideup.vi v8, v10, 2
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave4_same_nonconst_splat_v8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: addi sp, sp, -16
+; ZVBB-NEXT: .cfi_def_cfa_offset 16
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: sub sp, sp, a1
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVBB-NEXT: vmv.v.x v8, a0
+; ZVBB-NEXT: addi a0, sp, 16
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: vmv1r.v v9, v8
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vmv1r.v v10, v8
+; ZVBB-NEXT: add a2, a0, a1
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
+; ZVBB-NEXT: vsseg4e16.v v8, (a0)
+; ZVBB-NEXT: add a3, a2, a1
+; ZVBB-NEXT: add a1, a3, a1
+; ZVBB-NEXT: vle16.v v9, (a3)
+; ZVBB-NEXT: vle16.v v10, (a2)
+; ZVBB-NEXT: vle16.v v11, (a1)
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVBB-NEXT: vslideup.vi v9, v11, 2
+; ZVBB-NEXT: vslideup.vi v8, v10, 2
+; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVBB-NEXT: vslideup.vi v8, v9, 4
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: add sp, sp, a0
+; ZVBB-NEXT: .cfi_def_cfa sp, 16
+; ZVBB-NEXT: addi sp, sp, 16
+; ZVBB-NEXT: .cfi_def_cfa_offset 0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave4_same_nonconst_splat_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: sub sp, sp, a1
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vmv.v.x v8, a0
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: vmv1r.v v9, v8
+; ZIP-NEXT: srli a1, a1, 2
+; ZIP-NEXT: vmv1r.v v10, v8
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: vmv1r.v v11, v8
+; ZIP-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
+; ZIP-NEXT: vsseg4e16.v v8, (a0)
+; ZIP-NEXT: add a3, a2, a1
+; ZIP-NEXT: add a1, a3, a1
+; ZIP-NEXT: vle16.v v9, (a3)
+; ZIP-NEXT: vle16.v v10, (a2)
+; ZIP-NEXT: vle16.v v11, (a1)
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v11, 2
+; ZIP-NEXT: vslideup.vi v8, v10, 2
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 4
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
+ %ins = insertelement <2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> zeroinitializer
+ %retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat, <2 x i16> %splat)
+ ret <8 x i16> %retval
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 01cc5c58b24ce..7a977ff9b4e3a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -14947,3 +14947,303 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv2f64(<vscale x 2 x
%res = call <vscale x 16 x double> @llvm.vector.interleave8.nxv16f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x double> %v4, <vscale x 2 x double> %v5, <vscale x 2 x double> %v6, <vscale x 2 x double> %v7)
ret <vscale x 16 x double> %res
}
+
+define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
+; V-LABEL: interleave2_same_const_splat_nxv4i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; V-NEXT: vmv.v.i v9, 3
+; V-NEXT: li a0, 3
+; V-NEXT: vmv.v.i v10, -1
+; V-NEXT: vwaddu.vx v8, v9, a0
+; V-NEXT: vwmaccu.vx v8, a0, v10
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v9, v8, a0
+; V-NEXT: vslideup.vx v8, v9, a0
+; V-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_same_const_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 3
+; ZVBB-NEXT: li a0, 3
+; ZVBB-NEXT: vwsll.vi v9, v8, 16
+; ZVBB-NEXT: vwaddu.wx v8, v9, a0
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v9, v8, a0
+; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave2_same_const_splat_nxv4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZIP-NEXT: vmv.v.i v9, 3
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: ri.vzip2b.vv v10, v9, v9
+; ZIP-NEXT: ri.vzip2a.vv v8, v9, v9
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v8, v10, a0
+; ZIP-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
+; SVE-LABEL: interleave2_diff_const_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z0.d, #4 // =0x4
+; SVE-NEXT: mov z1.d, #3 // =0x3
+; SVE-NEXT: zip2 z2.d, z1.d, z0.d
+; SVE-NEXT: zip1 z0.d, z1.d, z0.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_diff_const_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: mov z0.d, #4 // =0x4
+; SME2-NEXT: mov z1.d, #3 // =0x3
+; SME2-NEXT: zip { z0.d, z1.d }, z1.d, z0.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+; V-LABEL: interleave2_diff_const_splat_nxv4i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; V-NEXT: vmv.v.i v9, 3
+; V-NEXT: li a0, 4
+; V-NEXT: vmv.v.i v10, -1
+; V-NEXT: vwaddu.vx v8, v9, a0
+; V-NEXT: vwmaccu.vx v8, a0, v10
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v9, v8, a0
+; V-NEXT: vslideup.vx v8, v9, a0
+; V-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_diff_const_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 4
+; ZVBB-NEXT: li a0, 3
+; ZVBB-NEXT: vwsll.vi v9, v8, 16
+; ZVBB-NEXT: vwaddu.wx v8, v9, a0
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v9, v8, a0
+; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave2_diff_const_splat_nxv4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZIP-NEXT: vmv.v.i v9, 4
+; ZIP-NEXT: vmv.v.i v10, 3
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: ri.vzip2b.vv v11, v10, v9
+; ZIP-NEXT: ri.vzip2a.vv v8, v10, v9
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v8, v11, a0
+; ZIP-NEXT: ret
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.v4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 4))
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
+; V-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; V-NEXT: vmv.v.x v9, a0
+; V-NEXT: vmv.v.i v10, -1
+; V-NEXT: vwaddu.vx v8, v9, a0
+; V-NEXT: vwmaccu.vx v8, a0, v10
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v9, v8, a0
+; V-NEXT: vslideup.vx v8, v9, a0
+; V-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.x v8, a0
+; ZVBB-NEXT: vwsll.vi v9, v8, 16
+; ZVBB-NEXT: vwaddu.wx v8, v9, a0
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: srli a0, a0, 2
+; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v9, v8, a0
+; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZIP-NEXT: vmv.v.x v9, a0
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: ri.vzip2b.vv v10, v9, v9
+; ZIP-NEXT: ri.vzip2a.vv v8, v9, v9
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v8, v10, a0
+; ZIP-NEXT: ret
+ %ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %b) {
+; SVE-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; SVE: // %bb.0:
+; SVE-NEXT: // kill: def $w1 killed $w1 def $x1
+; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
+; SVE-NEXT: mov z0.d, x0
+; SVE-NEXT: mov z1.d, x1
+; SVE-NEXT: zip2 z2.d, z0.d, z1.d
+; SVE-NEXT: zip1 z0.d, z0.d, z1.d
+; SVE-NEXT: uzp1 z0.s, z0.s, z2.s
+; SVE-NEXT: ret
+;
+; SME2-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; SME2: // %bb.0:
+; SME2-NEXT: // kill: def $w1 killed $w1 def $x1
+; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
+; SME2-NEXT: mov z0.d, x0
+; SME2-NEXT: mov z1.d, x1
+; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z1.d
+; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
+; SME2-NEXT: ret
+; V-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; V: # %bb.0:
+; V-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; V-NEXT: vmv.v.x v9, a0
+; V-NEXT: vmv.v.i v10, -1
+; V-NEXT: csrr a0, vlenb
+; V-NEXT: vwaddu.vx v8, v9, a1
+; V-NEXT: vwmaccu.vx v8, a1, v10
+; V-NEXT: srli a0, a0, 2
+; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; V-NEXT: vslidedown.vx v9, v8, a0
+; V-NEXT: vslideup.vx v8, v9, a0
+; V-NEXT: ret
+;
+; ZVBB-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.x v8, a1
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: vwsll.vi v9, v8, 16
+; ZVBB-NEXT: vwaddu.wx v8, v9, a0
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslidedown.vx v9, v8, a1
+; ZVBB-NEXT: vslideup.vx v8, v9, a1
+; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: interleave2_diff_nonconst_splat_nxv4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZIP-NEXT: vmv.v.x v9, a0
+; ZIP-NEXT: vmv.v.x v10, a1
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: ri.vzip2b.vv v11, v9, v10
+; ZIP-NEXT: ri.vzip2a.vv v8, v9, v10
+; ZIP-NEXT: srli a0, a0, 2
+; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v8, v11, a0
+; ZIP-NEXT: ret
+ %ins1 = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+ %splat1 = shufflevector <vscale x 2 x i16> %ins1, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %ins2 = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
+ %splat2 = shufflevector <vscale x 2 x i16> %ins2, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+ %retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat1, <vscale x 2 x i16> %splat2)
+ ret <vscale x 4 x i16> %retval
+}
+
+define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
+; CHECK-LABEL: interleave4_same_const_splat_nxv8i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: sub sp, sp, a0
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: srli a2, a1, 1
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: add a3, a0, a2
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: vsseg4e16.v v8, (a0)
+; CHECK-NEXT: add a4, a3, a2
+; CHECK-NEXT: add a2, a4, a2
+; CHECK-NEXT: vle16.v v9, (a4)
+; CHECK-NEXT: vle16.v v8, (a2)
+; CHECK-NEXT: srli a1, a1, 2
+; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v9, v8, a1
+; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v10, (a3)
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vx v8, v10, a1
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 1
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: .cfi_def_cfa sp, 16
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: ret
+;
+; ZVBB-LABEL: interleave4_same_const_splat_nxv8i16:
+; ZVBB: # %bb.0:
+; ZVBB-NEXT: addi sp, sp, -16
+; ZVBB-NEXT: .cfi_def_cfa_offset 16
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: slli a0, a0, 1
+; ZVBB-NEXT: sub sp, sp, a0
+; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vmv.v.i v8, 3
+; ZVBB-NEXT: addi a0, sp, 16
+; ZVBB-NEXT: csrr a1, vlenb
+; ZVBB-NEXT: vmv1r.v v9, v8
+; ZVBB-NEXT: srli a2, a1, 1
+; ZVBB-NEXT: vmv1r.v v10, v8
+; ZVBB-NEXT: add a3, a0, a2
+; ZVBB-NEXT: vmv1r.v v11, v8
+; ZVBB-NEXT: vsseg4e16.v v8, (a0)
+; ZVBB-NEXT: add a4, a3, a2
+; ZVBB-NEXT: add a2, a4, a2
+; ZVBB-NEXT: vle16.v v9, (a4)
+; ZVBB-NEXT: vle16.v v8, (a2)
+; ZVBB-NEXT: srli a1, a1, 2
+; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v9, v8, a1
+; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vle16.v v10, (a3)
+; ZVBB-NEXT: vle16.v v8, (a0)
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; ZVBB-NEXT: vslideup.vx v8, v10, a1
+; ZVBB-NEXT: csrr a0, vlenb
+; ZVBB-NEXT: slli a0, a0, 1
+; ZVBB-NEXT: add sp, sp, a0
+; ZVBB-NEXT: .cfi_def_cfa sp, 16
+; ZVBB-NEXT: addi sp, sp, 16
+; ZVBB-NEXT: .cfi_def_cfa_offset 0
+; ZVBB-NEXT: ret
+ %retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
+ ret <vscale x 8 x i16> %retval
+}
>From 82e5c49d383e1018dece10d2210b73cf13a40c94 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood at arm.com>
Date: Tue, 29 Jul 2025 14:07:27 +0000
Subject: [PATCH 2/2] [DAGCombiner] Add combine for vector interleave of splats
This patch adds a DAG combine that looks for
concat_vectors(vector_interleave(splat, splat, ...)),
where all the splats are identical.
For fixed-width vectors the DAG combine only occurs for
interleave factors of 3 or more, however it's not currently
safe to test this for AArch64 since there isn't any lowering
support for fixed-width interleaves. I've only added
fixed-width tests for RISCV.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 35 ++++
.../CodeGen/AArch64/sve-vector-interleave.ll | 111 ++--------
.../RISCV/rvv/vector-interleave-fixed.ll | 192 +-----------------
.../CodeGen/RISCV/rvv/vector-interleave.ll | 148 ++------------
4 files changed, 67 insertions(+), 419 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 251682a5abbb0..1103a8518f3f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25173,6 +25173,38 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(CastOpcode, DL, VT, NewConcat);
}
+static SDValue combineConcatVectorInterleave(SDNode *N, SelectionDAG &DAG) {
+ SDValue FirstOp = N->getOperand(0);
+ if (FirstOp.getOpcode() != ISD::VECTOR_INTERLEAVE ||
+ FirstOp.getNumOperands() != N->getNumOperands())
+ return SDValue();
+
+ for (unsigned I = 0; I < N->getNumOperands(); I++) {
+ if (N->getOperand(I).getResNo() != I ||
+ N->getOperand(I).getNode() != FirstOp.getNode())
+ return SDValue();
+ }
+
+ SDValue InOp0 = FirstOp.getOperand(0);
+ if (!llvm::all_of(FirstOp->ops(),
+ [&InOp0](SDValue Op) { return Op == InOp0; }))
+ return SDValue();
+
+ // We're concatenating all the sequential results of the same vector
+ // interleave node. Now check if all inputs to the interleave are splats.
+ if (SDValue Splat = DAG.getSplatValue(InOp0)) {
+ SDLoc DL(N);
+ EVT SubVecTy = InOp0.getValueType();
+ // Create the wider type required.
+ EVT WideVecTy = EVT::getVectorVT(
+ *DAG.getContext(), SubVecTy.getScalarType(),
+ SubVecTy.getVectorElementCount() * N->getNumOperands());
+ return DAG.getSplat(WideVecTy, DL, Splat);
+ }
+
+ return SDValue();
+}
+
// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of
// the operands is a SHUFFLE_VECTOR, and all other operands are also operands
// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR.
@@ -25397,6 +25429,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return DAG.getBuildVector(VT, SDLoc(N), Opnds);
}
+ if (SDValue V = combineConcatVectorInterleave(N, DAG))
+ return V;
+
// Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
// FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...).
if (SDValue V = combineConcatVectorOfScalars(N, DAG))
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
index b954863560899..7280114ca8aaf 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll
@@ -541,20 +541,10 @@ define <vscale x 4 x i32> @interleave2_nxv2i32(<vscale x 2 x i32> %vec0, <vscale
}
define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
-; SVE-LABEL: interleave2_same_const_splat_nxv4i16:
-; SVE: // %bb.0:
-; SVE-NEXT: mov z0.d, #3 // =0x3
-; SVE-NEXT: zip2 z1.d, z0.d, z0.d
-; SVE-NEXT: zip1 z0.d, z0.d, z0.d
-; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
-; SVE-NEXT: ret
-;
-; SME2-LABEL: interleave2_same_const_splat_nxv4i16:
-; SME2: // %bb.0:
-; SME2-NEXT: mov z0.d, #3 // =0x3
-; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
-; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
-; SME2-NEXT: ret
+; CHECK-LABEL: interleave2_same_const_splat_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, #3 // =0x3
+; CHECK-NEXT: ret
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
ret <vscale x 4 x i16> %retval
}
@@ -581,22 +571,10 @@ define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
}
define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
-; SVE-LABEL: interleave2_same_nonconst_splat_nxv4i16:
-; SVE: // %bb.0:
-; SVE-NEXT: // kill: def $w0 killed $w0 def $x0
-; SVE-NEXT: mov z0.d, x0
-; SVE-NEXT: zip2 z1.d, z0.d, z0.d
-; SVE-NEXT: zip1 z0.d, z0.d, z0.d
-; SVE-NEXT: uzp1 z0.s, z0.s, z1.s
-; SVE-NEXT: ret
-;
-; SME2-LABEL: interleave2_same_nonconst_splat_nxv4i16:
-; SME2: // %bb.0:
-; SME2-NEXT: // kill: def $w0 killed $w0 def $x0
-; SME2-NEXT: mov z0.d, x0
-; SME2-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
-; SME2-NEXT: uzp1 z0.s, z0.s, z1.s
-; SME2-NEXT: ret
+; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, w0
+; CHECK-NEXT: ret
%ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
%splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
@@ -633,75 +611,10 @@ define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %
}
define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
-; SVE-LABEL: interleave4_same_const_splat_nxv8i16:
-; SVE: // %bb.0:
-; SVE-NEXT: mov z0.d, #3 // =0x3
-; SVE-NEXT: zip1 z1.d, z0.d, z0.d
-; SVE-NEXT: zip1 z2.d, z1.d, z1.d
-; SVE-NEXT: zip2 z1.d, z1.d, z1.d
-; SVE-NEXT: uzp1 z2.s, z2.s, z0.s
-; SVE-NEXT: uzp1 z2.h, z2.h, z0.h
-; SVE-NEXT: uunpklo z2.s, z2.h
-; SVE-NEXT: uunpklo z2.d, z2.s
-; SVE-NEXT: uzp1 z1.s, z2.s, z1.s
-; SVE-NEXT: uzp1 z2.h, z1.h, z0.h
-; SVE-NEXT: zip2 z0.d, z0.d, z0.d
-; SVE-NEXT: uunpkhi z2.s, z2.h
-; SVE-NEXT: zip1 z3.d, z0.d, z0.d
-; SVE-NEXT: zip2 z0.d, z0.d, z0.d
-; SVE-NEXT: uunpkhi z2.d, z2.s
-; SVE-NEXT: uzp1 z2.s, z3.s, z2.s
-; SVE-NEXT: uzp1 z2.h, z1.h, z2.h
-; SVE-NEXT: uunpkhi z2.s, z2.h
-; SVE-NEXT: uunpklo z2.d, z2.s
-; SVE-NEXT: uzp1 z0.s, z2.s, z0.s
-; SVE-NEXT: uzp1 z0.h, z1.h, z0.h
-; SVE-NEXT: ret
-;
-; SME-ALL-LABEL: interleave4_same_const_splat_nxv8i16:
-; SME-ALL: // %bb.0:
-; SME-ALL-NEXT: mov z0.d, #3 // =0x3
-; SME-ALL-NEXT: zip { z0.d, z1.d }, z0.d, z0.d
-; SME-ALL-NEXT: zip { z2.d, z3.d }, z0.d, z0.d
-; SME-ALL-NEXT: uzp1 z4.s, z2.s, z0.s
-; SME-ALL-NEXT: uzp1 z4.h, z4.h, z0.h
-; SME-ALL-NEXT: uunpklo z4.s, z4.h
-; SME-ALL-NEXT: uunpklo z4.d, z4.s
-; SME-ALL-NEXT: uzp1 z2.s, z4.s, z3.s
-; SME-ALL-NEXT: uzp1 z3.h, z2.h, z0.h
-; SME-ALL-NEXT: zip { z0.d, z1.d }, z1.d, z1.d
-; SME-ALL-NEXT: uunpkhi z3.s, z3.h
-; SME-ALL-NEXT: uunpkhi z3.d, z3.s
-; SME-ALL-NEXT: uzp1 z3.s, z0.s, z3.s
-; SME-ALL-NEXT: uzp1 z3.h, z2.h, z3.h
-; SME-ALL-NEXT: uunpkhi z3.s, z3.h
-; SME-ALL-NEXT: uunpklo z3.d, z3.s
-; SME-ALL-NEXT: uzp1 z0.s, z3.s, z1.s
-; SME-ALL-NEXT: uzp1 z0.h, z2.h, z0.h
-; SME-ALL-NEXT: ret
-;
-; SME2-256-LABEL: interleave4_same_const_splat_nxv8i16:
-; SME2-256: // %bb.0:
-; SME2-256-NEXT: mov z0.d, #3 // =0x3
-; SME2-256-NEXT: mov z1.d, z0.d
-; SME2-256-NEXT: mov z2.d, z0.d
-; SME2-256-NEXT: mov z3.d, z0.d
-; SME2-256-NEXT: zip { z0.d - z3.d }, { z0.d - z3.d }
-; SME2-256-NEXT: uzp1 z4.s, z0.s, z0.s
-; SME2-256-NEXT: uzp1 z4.h, z4.h, z0.h
-; SME2-256-NEXT: uunpklo z4.s, z4.h
-; SME2-256-NEXT: uunpklo z4.d, z4.s
-; SME2-256-NEXT: uzp1 z4.s, z4.s, z1.s
-; SME2-256-NEXT: uzp1 z5.h, z4.h, z0.h
-; SME2-256-NEXT: uunpkhi z5.s, z5.h
-; SME2-256-NEXT: uunpkhi z5.d, z5.s
-; SME2-256-NEXT: uzp1 z5.s, z2.s, z5.s
-; SME2-256-NEXT: uzp1 z5.h, z4.h, z5.h
-; SME2-256-NEXT: uunpkhi z5.s, z5.h
-; SME2-256-NEXT: uunpklo z5.d, z5.s
-; SME2-256-NEXT: uzp1 z0.s, z5.s, z3.s
-; SME2-256-NEXT: uzp1 z0.h, z4.h, z0.h
-; SME2-256-NEXT: ret
+; CHECK-LABEL: interleave4_same_const_splat_nxv8i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, #3 // =0x3
+; CHECK-NEXT: ret
%retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
ret <vscale x 8 x i16> %retval
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index f3ba7fe33fa48..38d38f78c6054 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -1640,110 +1640,20 @@ define <8 x half> @vector_interleave8_v8f16_v1f16(<1 x half> %a, <1 x half> %b,
define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) {
; CHECK-LABEL: interleave4_const_splat_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vmv1r.v v9, v8
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: vmv1r.v v10, v8
-; CHECK-NEXT: add a2, a0, a1
-; CHECK-NEXT: vmv1r.v v11, v8
-; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vsseg4e16.v v8, (a0)
-; CHECK-NEXT: add a3, a2, a1
-; CHECK-NEXT: add a1, a3, a1
-; CHECK-NEXT: vle16.v v9, (a3)
-; CHECK-NEXT: vle16.v v10, (a2)
-; CHECK-NEXT: vle16.v v11, (a1)
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v9, v11, 2
-; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 4
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: ret
;
; ZVBB-LABEL: interleave4_const_splat_v8i16:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: addi sp, sp, -16
-; ZVBB-NEXT: .cfi_def_cfa_offset 16
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: sub sp, sp, a0
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVBB-NEXT: vmv.v.i v8, 3
-; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: csrr a1, vlenb
-; ZVBB-NEXT: vmv1r.v v9, v8
-; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: vmv1r.v v10, v8
-; ZVBB-NEXT: add a2, a0, a1
-; ZVBB-NEXT: vmv1r.v v11, v8
-; ZVBB-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
-; ZVBB-NEXT: vsseg4e16.v v8, (a0)
-; ZVBB-NEXT: add a3, a2, a1
-; ZVBB-NEXT: add a1, a3, a1
-; ZVBB-NEXT: vle16.v v9, (a3)
-; ZVBB-NEXT: vle16.v v10, (a2)
-; ZVBB-NEXT: vle16.v v11, (a1)
-; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vi v9, v11, 2
-; ZVBB-NEXT: vslideup.vi v8, v10, 2
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT: vslideup.vi v8, v9, 4
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: add sp, sp, a0
-; ZVBB-NEXT: .cfi_def_cfa sp, 16
-; ZVBB-NEXT: addi sp, sp, 16
-; ZVBB-NEXT: .cfi_def_cfa_offset 0
+; ZVBB-NEXT: vmv.v.i v8, 3
; ZVBB-NEXT: ret
;
; ZIP-LABEL: interleave4_const_splat_v8i16:
; ZIP: # %bb.0:
-; ZIP-NEXT: addi sp, sp, -16
-; ZIP-NEXT: .cfi_def_cfa_offset 16
-; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: sub sp, sp, a0
-; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZIP-NEXT: vmv.v.i v8, 3
-; ZIP-NEXT: addi a0, sp, 16
-; ZIP-NEXT: csrr a1, vlenb
-; ZIP-NEXT: vmv1r.v v9, v8
-; ZIP-NEXT: srli a1, a1, 2
-; ZIP-NEXT: vmv1r.v v10, v8
-; ZIP-NEXT: add a2, a0, a1
-; ZIP-NEXT: vmv1r.v v11, v8
-; ZIP-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
-; ZIP-NEXT: vsseg4e16.v v8, (a0)
-; ZIP-NEXT: add a3, a2, a1
-; ZIP-NEXT: add a1, a3, a1
-; ZIP-NEXT: vle16.v v9, (a3)
-; ZIP-NEXT: vle16.v v10, (a2)
-; ZIP-NEXT: vle16.v v11, (a1)
-; ZIP-NEXT: vle16.v v8, (a0)
-; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZIP-NEXT: vslideup.vi v9, v11, 2
-; ZIP-NEXT: vslideup.vi v8, v10, 2
; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZIP-NEXT: vslideup.vi v8, v9, 4
-; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: add sp, sp, a0
-; ZIP-NEXT: .cfi_def_cfa sp, 16
-; ZIP-NEXT: addi sp, sp, 16
-; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: vmv.v.i v8, 3
; ZIP-NEXT: ret
%retval = call <8 x i16> @llvm.vector.interleave4.v8i16(<2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3), <2 x i16> splat(i16 3))
ret <8 x i16> %retval
@@ -1752,110 +1662,20 @@ define <8 x i16> @interleave4_const_splat_v8i16(<2 x i16> %a) {
define <8 x i16> @interleave4_same_nonconst_splat_v8i16(i16 %a) {
; CHECK-LABEL: interleave4_same_nonconst_splat_v8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vmv1r.v v9, v8
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: vmv1r.v v10, v8
-; CHECK-NEXT: add a2, a0, a1
-; CHECK-NEXT: vmv1r.v v11, v8
-; CHECK-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vsseg4e16.v v8, (a0)
-; CHECK-NEXT: add a3, a2, a1
-; CHECK-NEXT: add a1, a3, a1
-; CHECK-NEXT: vle16.v v9, (a3)
-; CHECK-NEXT: vle16.v v10, (a2)
-; CHECK-NEXT: vle16.v v11, (a1)
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vslideup.vi v9, v11, 2
-; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vi v8, v9, 4
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: ret
;
; ZVBB-LABEL: interleave4_same_nonconst_splat_v8i16:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: addi sp, sp, -16
-; ZVBB-NEXT: .cfi_def_cfa_offset 16
-; ZVBB-NEXT: csrr a1, vlenb
-; ZVBB-NEXT: sub sp, sp, a1
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZVBB-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVBB-NEXT: vmv.v.x v8, a0
-; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: csrr a1, vlenb
-; ZVBB-NEXT: vmv1r.v v9, v8
-; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: vmv1r.v v10, v8
-; ZVBB-NEXT: add a2, a0, a1
-; ZVBB-NEXT: vmv1r.v v11, v8
-; ZVBB-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
-; ZVBB-NEXT: vsseg4e16.v v8, (a0)
-; ZVBB-NEXT: add a3, a2, a1
-; ZVBB-NEXT: add a1, a3, a1
-; ZVBB-NEXT: vle16.v v9, (a3)
-; ZVBB-NEXT: vle16.v v10, (a2)
-; ZVBB-NEXT: vle16.v v11, (a1)
-; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVBB-NEXT: vslideup.vi v9, v11, 2
-; ZVBB-NEXT: vslideup.vi v8, v10, 2
; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVBB-NEXT: vslideup.vi v8, v9, 4
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: add sp, sp, a0
-; ZVBB-NEXT: .cfi_def_cfa sp, 16
-; ZVBB-NEXT: addi sp, sp, 16
-; ZVBB-NEXT: .cfi_def_cfa_offset 0
+; ZVBB-NEXT: vmv.v.x v8, a0
; ZVBB-NEXT: ret
;
; ZIP-LABEL: interleave4_same_nonconst_splat_v8i16:
; ZIP: # %bb.0:
-; ZIP-NEXT: addi sp, sp, -16
-; ZIP-NEXT: .cfi_def_cfa_offset 16
-; ZIP-NEXT: csrr a1, vlenb
-; ZIP-NEXT: sub sp, sp, a1
-; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
-; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZIP-NEXT: vmv.v.x v8, a0
-; ZIP-NEXT: addi a0, sp, 16
-; ZIP-NEXT: csrr a1, vlenb
-; ZIP-NEXT: vmv1r.v v9, v8
-; ZIP-NEXT: srli a1, a1, 2
-; ZIP-NEXT: vmv1r.v v10, v8
-; ZIP-NEXT: add a2, a0, a1
-; ZIP-NEXT: vmv1r.v v11, v8
-; ZIP-NEXT: vsetvli a3, zero, e16, mf4, ta, ma
-; ZIP-NEXT: vsseg4e16.v v8, (a0)
-; ZIP-NEXT: add a3, a2, a1
-; ZIP-NEXT: add a1, a3, a1
-; ZIP-NEXT: vle16.v v9, (a3)
-; ZIP-NEXT: vle16.v v10, (a2)
-; ZIP-NEXT: vle16.v v11, (a1)
-; ZIP-NEXT: vle16.v v8, (a0)
-; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZIP-NEXT: vslideup.vi v9, v11, 2
-; ZIP-NEXT: vslideup.vi v8, v10, 2
; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZIP-NEXT: vslideup.vi v8, v9, 4
-; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: add sp, sp, a0
-; ZIP-NEXT: .cfi_def_cfa sp, 16
-; ZIP-NEXT: addi sp, sp, 16
-; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: vmv.v.x v8, a0
; ZIP-NEXT: ret
%ins = insertelement <2 x i16> poison, i16 %a, i32 0
%splat = shufflevector <2 x i16> %ins, <2 x i16> poison, <2 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
index 7a977ff9b4e3a..ec311cfa3cdc5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll
@@ -14949,46 +14949,17 @@ define <vscale x 16 x double> @vector_interleave_nxv16f64_nxv2f64(<vscale x 2 x
}
define <vscale x 4 x i16> @interleave2_same_const_splat_nxv4i16() {
-; V-LABEL: interleave2_same_const_splat_nxv4i16:
-; V: # %bb.0:
-; V-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
-; V-NEXT: vmv.v.i v9, 3
-; V-NEXT: li a0, 3
-; V-NEXT: vmv.v.i v10, -1
-; V-NEXT: vwaddu.vx v8, v9, a0
-; V-NEXT: vwmaccu.vx v8, a0, v10
-; V-NEXT: csrr a0, vlenb
-; V-NEXT: srli a0, a0, 2
-; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; V-NEXT: vslidedown.vx v9, v8, a0
-; V-NEXT: vslideup.vx v8, v9, a0
-; V-NEXT: ret
+; CHECK-LABEL: interleave2_same_const_splat_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: ret
;
; ZVBB-LABEL: interleave2_same_const_splat_nxv4i16:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
; ZVBB-NEXT: vmv.v.i v8, 3
-; ZVBB-NEXT: li a0, 3
-; ZVBB-NEXT: vwsll.vi v9, v8, 16
-; ZVBB-NEXT: vwaddu.wx v8, v9, a0
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: srli a0, a0, 2
-; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVBB-NEXT: vslidedown.vx v9, v8, a0
-; ZVBB-NEXT: vslideup.vx v8, v9, a0
; ZVBB-NEXT: ret
-;
-; ZIP-LABEL: interleave2_same_const_splat_nxv4i16:
-; ZIP: # %bb.0:
-; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZIP-NEXT: vmv.v.i v9, 3
-; ZIP-NEXT: srli a0, a0, 2
-; ZIP-NEXT: ri.vzip2b.vv v10, v9, v9
-; ZIP-NEXT: ri.vzip2a.vv v8, v9, v9
-; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZIP-NEXT: vslideup.vx v8, v10, a0
-; ZIP-NEXT: ret
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
ret <vscale x 4 x i16> %retval
}
@@ -15056,44 +15027,17 @@ define <vscale x 4 x i16> @interleave2_diff_const_splat_nxv4i16() {
}
define <vscale x 4 x i16> @interleave2_same_nonconst_splat_nxv4i16(i16 %a) {
-; V-LABEL: interleave2_same_nonconst_splat_nxv4i16:
-; V: # %bb.0:
-; V-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; V-NEXT: vmv.v.x v9, a0
-; V-NEXT: vmv.v.i v10, -1
-; V-NEXT: vwaddu.vx v8, v9, a0
-; V-NEXT: vwmaccu.vx v8, a0, v10
-; V-NEXT: csrr a0, vlenb
-; V-NEXT: srli a0, a0, 2
-; V-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; V-NEXT: vslidedown.vx v9, v8, a0
-; V-NEXT: vslideup.vx v8, v9, a0
-; V-NEXT: ret
+; CHECK-LABEL: interleave2_same_nonconst_splat_nxv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: ret
;
; ZVBB-LABEL: interleave2_same_nonconst_splat_nxv4i16:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZVBB-NEXT: vmv.v.x v8, a0
-; ZVBB-NEXT: vwsll.vi v9, v8, 16
-; ZVBB-NEXT: vwaddu.wx v8, v9, a0
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: srli a0, a0, 2
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZVBB-NEXT: vslidedown.vx v9, v8, a0
-; ZVBB-NEXT: vslideup.vx v8, v9, a0
+; ZVBB-NEXT: vmv.v.x v8, a0
; ZVBB-NEXT: ret
-;
-; ZIP-LABEL: interleave2_same_nonconst_splat_nxv4i16:
-; ZIP: # %bb.0:
-; ZIP-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; ZIP-NEXT: vmv.v.x v9, a0
-; ZIP-NEXT: csrr a0, vlenb
-; ZIP-NEXT: srli a0, a0, 2
-; ZIP-NEXT: ri.vzip2b.vv v10, v9, v9
-; ZIP-NEXT: ri.vzip2a.vv v8, v9, v9
-; ZIP-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; ZIP-NEXT: vslideup.vx v8, v10, a0
-; ZIP-NEXT: ret
%ins = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
%splat = shufflevector <vscale x 2 x i16> %ins, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
%retval = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %splat, <vscale x 2 x i16> %splat)
@@ -15171,78 +15115,14 @@ define <vscale x 4 x i16> @interleave2_diff_nonconst_splat_nxv4i16(i16 %a, i16 %
define <vscale x 8 x i16> @interleave4_same_const_splat_nxv8i16() {
; CHECK-LABEL: interleave4_same_const_splat_nxv8i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: vmv1r.v v9, v8
-; CHECK-NEXT: srli a2, a1, 1
-; CHECK-NEXT: vmv1r.v v10, v8
-; CHECK-NEXT: add a3, a0, a2
-; CHECK-NEXT: vmv1r.v v11, v8
-; CHECK-NEXT: vsseg4e16.v v8, (a0)
-; CHECK-NEXT: add a4, a3, a2
-; CHECK-NEXT: add a2, a4, a2
-; CHECK-NEXT: vle16.v v9, (a4)
-; CHECK-NEXT: vle16.v v8, (a2)
-; CHECK-NEXT: srli a1, a1, 2
-; CHECK-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v9, v8, a1
-; CHECK-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vle16.v v10, (a3)
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; CHECK-NEXT: vslideup.vx v8, v10, a1
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: .cfi_def_cfa sp, 16
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: .cfi_def_cfa_offset 0
; CHECK-NEXT: ret
;
; ZVBB-LABEL: interleave4_same_const_splat_nxv8i16:
; ZVBB: # %bb.0:
-; ZVBB-NEXT: addi sp, sp, -16
-; ZVBB-NEXT: .cfi_def_cfa_offset 16
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 1
-; ZVBB-NEXT: sub sp, sp, a0
-; ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
-; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
; ZVBB-NEXT: vmv.v.i v8, 3
-; ZVBB-NEXT: addi a0, sp, 16
-; ZVBB-NEXT: csrr a1, vlenb
-; ZVBB-NEXT: vmv1r.v v9, v8
-; ZVBB-NEXT: srli a2, a1, 1
-; ZVBB-NEXT: vmv1r.v v10, v8
-; ZVBB-NEXT: add a3, a0, a2
-; ZVBB-NEXT: vmv1r.v v11, v8
-; ZVBB-NEXT: vsseg4e16.v v8, (a0)
-; ZVBB-NEXT: add a4, a3, a2
-; ZVBB-NEXT: add a2, a4, a2
-; ZVBB-NEXT: vle16.v v9, (a4)
-; ZVBB-NEXT: vle16.v v8, (a2)
-; ZVBB-NEXT: srli a1, a1, 2
-; ZVBB-NEXT: vsetvli a2, zero, e16, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v9, v8, a1
-; ZVBB-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
-; ZVBB-NEXT: vle16.v v10, (a3)
-; ZVBB-NEXT: vle16.v v8, (a0)
-; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
-; ZVBB-NEXT: vslideup.vx v8, v10, a1
-; ZVBB-NEXT: csrr a0, vlenb
-; ZVBB-NEXT: slli a0, a0, 1
-; ZVBB-NEXT: add sp, sp, a0
-; ZVBB-NEXT: .cfi_def_cfa sp, 16
-; ZVBB-NEXT: addi sp, sp, 16
-; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
%retval = call <vscale x 8 x i16> @llvm.vector.interleave4.nxv8i16(<vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3), <vscale x 2 x i16> splat(i16 3))
ret <vscale x 8 x i16> %retval
More information about the llvm-commits
mailing list