[llvm] b6dff56 - [RISCV] Add vizip check lines to (de)interleave tests
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 19:50:52 PDT 2025
Author: Philip Reames
Date: 2025-04-17T19:50:43-07:00
New Revision: b6dff5660092e4814fa2e2cc129ba0d05ce49a52
URL: https://github.com/llvm/llvm-project/commit/b6dff5660092e4814fa2e2cc129ba0d05ce49a52
DIFF: https://github.com/llvm/llvm-project/commit/b6dff5660092e4814fa2e2cc129ba0d05ce49a52.diff
LOG: [RISCV] Add vizip check lines to (de)interleave tests
Reducing churn in advance of a change which makes better use of these.
Note that for very short fixed length shuffles, we already use the
vizip family instructions - but mostly in the form of zipeven/zipodd.
Added:
Modified:
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
index 9279e0a4d3a6c..c65d7c36a2198 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll
@@ -5,6 +5,9 @@
; RUN: llc < %s -mtriple=riscv64 -mattr=+f,+zve32f,+zvfh,+zvl256b \
; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
; RUN: | FileCheck %s --check-prefixes=CHECK,ZVE32F
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+zvl256b,+experimental-xrivosvizip \
+; RUN: -lower-interleaved-accesses=false -verify-machineinstrs \
+; RUN: | FileCheck %s --check-prefixes=CHECK,ZIP
define void @vnsrl_0_i8(ptr %in, ptr %out) {
; CHECK-LABEL: vnsrl_0_i8:
@@ -56,6 +59,15 @@ define void @vnsrl_0_i16(ptr %in, ptr %out) {
; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; ZVE32F-NEXT: vse16.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i16:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vse16.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i16>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -81,6 +93,15 @@ define void @vnsrl_16_i16(ptr %in, ptr %out) {
; ZVE32F-NEXT: vnsrl.wi v8, v8, 16
; ZVE32F-NEXT: vse16.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_16_i16:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZIP-NEXT: vse16.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i16>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x i16> %0, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -106,6 +127,15 @@ define void @vnsrl_0_half(ptr %in, ptr %out) {
; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; ZVE32F-NEXT: vse16.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_half:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vse16.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x half>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -131,6 +161,15 @@ define void @vnsrl_16_half(ptr %in, ptr %out) {
; ZVE32F-NEXT: vnsrl.wi v8, v8, 16
; ZVE32F-NEXT: vse16.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_16_half:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZIP-NEXT: vse16.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x half>, ptr %in, align 2
%shuffle.i5 = shufflevector <8 x half> %0, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -157,6 +196,15 @@ define void @vnsrl_0_i32(ptr %in, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v8, v9, 1
; ZVE32F-NEXT: vse32.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i32:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vse32.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
@@ -185,6 +233,16 @@ define void @vnsrl_32_i32(ptr %in, ptr %out) {
; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t
; ZVE32F-NEXT: vse32.v v9, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_32_i32:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vnsrl.wx v8, v8, a0
+; ZIP-NEXT: vse32.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x i32> %0, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
@@ -211,6 +269,15 @@ define void @vnsrl_0_float(ptr %in, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v8, v9, 1
; ZVE32F-NEXT: vse32.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_float:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vse32.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x float>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 0, i32 2>
@@ -239,6 +306,16 @@ define void @vnsrl_32_float(ptr %in, ptr %out) {
; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t
; ZVE32F-NEXT: vse32.v v9, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_32_float:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vnsrl.wx v8, v8, a0
+; ZIP-NEXT: vse32.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x float>, ptr %in, align 4
%shuffle.i5 = shufflevector <4 x float> %0, <4 x float> poison, <2 x i32> <i32 1, i32 3>
@@ -264,6 +341,16 @@ define void @vnsrl_0_i64(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: vse64.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 0, i32 2>
@@ -290,6 +377,16 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_64_i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse64.v v10, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i64>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x i64> %0, <4 x i64> poison, <2 x i32> <i32 1, i32 3>
@@ -315,6 +412,16 @@ define void @vnsrl_0_double(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_double:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: vse64.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 0, i32 2>
@@ -341,6 +448,16 @@ define void @vnsrl_64_double(ptr %in, ptr %out) {
; ZVE32F-NEXT: sd a2, 0(a1)
; ZVE32F-NEXT: sd a0, 8(a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_64_double:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse64.v v10, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x double>, ptr %in, align 8
%shuffle.i5 = shufflevector <4 x double> %0, <4 x double> poison, <2 x i32> <i32 1, i32 3>
@@ -444,6 +561,15 @@ define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) {
; ZVE32F-NEXT: vnsrl.wi v8, v8, 0
; ZVE32F-NEXT: vse8.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i8_single_src:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vle8.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vse8.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in, align 1
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -469,6 +595,15 @@ define void @vnsrl_8_i8_single_src(ptr %in, ptr %out) {
; ZVE32F-NEXT: vnsrl.wi v8, v8, 8
; ZVE32F-NEXT: vse8.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_8_i8_single_src:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vle8.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 8
+; ZIP-NEXT: vse8.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in, align 1
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -496,6 +631,16 @@ define void @vnsrl_0_i8_single_wideuse(ptr %in, ptr %out) {
; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
; ZVE32F-NEXT: vse8.v v8, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i8_single_wideuse:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vle8.v v8, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vse8.v v8, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in, align 1
%shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -531,6 +676,18 @@ define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) {
; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma
; ZVE32F-NEXT: vse32.v v16, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i32_single_src_m8:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: li a2, 64
+; ZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZIP-NEXT: vnsrl.wi v16, v8, 0
+; ZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZIP-NEXT: vse32.v v16, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <64 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -564,6 +721,18 @@ define void @vnsrl_0_i32_single_src_m8_2(ptr %in, ptr %out) {
; ZVE32F-NEXT: vcompress.vm v16, v8, v24
; ZVE32F-NEXT: vse32.v v16, (a1)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i32_single_src_m8_2:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: li a2, 64
+; ZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetvli zero, a0, e32, m4, ta, ma
+; ZIP-NEXT: vnsrl.wi v16, v8, 0
+; ZIP-NEXT: vsetvli zero, a2, e32, m8, ta, ma
+; ZIP-NEXT: vse32.v v16, (a1)
+; ZIP-NEXT: ret
entry:
%0 = load <64 x i32>, ptr %in, align 4
%shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30, i32 32, i32 34, i32 36, i32 38, i32 40, i32 42, i32 44, i32 46, i32 48, i32 50, i32 52, i32 54, i32 56, i32 58, i32 60, i32 62, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -597,6 +766,19 @@ define void @vnsrl_0_i8_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v9, v8, 4
; ZVE32F-NEXT: vse8.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i8_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vle8.v v8, (a1)
+; ZIP-NEXT: vle8.v v9, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vnsrl.wi v9, v9, 0
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v8, 4
+; ZIP-NEXT: vse8.v v9, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in0, align 1
%1 = load <8 x i8>, ptr %in1, align 1
@@ -631,6 +813,19 @@ define void @vnsrl_8_8_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v9, v8, 4
; ZVE32F-NEXT: vse8.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_8_8_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vle8.v v8, (a1)
+; ZIP-NEXT: vle8.v v9, (a0)
+; ZIP-NEXT: vsetivli zero, 4, e8, mf8, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 8
+; ZIP-NEXT: vnsrl.wi v9, v9, 8
+; ZIP-NEXT: vsetivli zero, 8, e8, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v8, 4
+; ZIP-NEXT: vse8.v v9, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <8 x i8>, ptr %in0, align 1
%1 = load <8 x i8>, ptr %in1, align 1
@@ -665,6 +860,19 @@ define void @vnsrl_0_i16_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v9, v8, 2
; ZVE32F-NEXT: vse16.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i16_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vle16.v v8, (a1)
+; ZIP-NEXT: vle16.v v9, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vnsrl.wi v9, v9, 0
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v8, 2
+; ZIP-NEXT: vse16.v v9, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i16>, ptr %in0, align 2
%1 = load <4 x i16>, ptr %in1, align 2
@@ -699,6 +907,19 @@ define void @vnsrl_16_i16_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v9, v8, 2
; ZVE32F-NEXT: vse16.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_16_i16_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vle16.v v8, (a1)
+; ZIP-NEXT: vle16.v v9, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZIP-NEXT: vnsrl.wi v9, v9, 16
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v8, 2
+; ZIP-NEXT: vse16.v v9, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x i16>, ptr %in0, align 2
%1 = load <4 x i16>, ptr %in1, align 2
@@ -733,6 +954,19 @@ define void @vnsrl_0_half_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v9, v8, 2
; ZVE32F-NEXT: vse16.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_half_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vle16.v v8, (a1)
+; ZIP-NEXT: vle16.v v9, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 0
+; ZIP-NEXT: vnsrl.wi v9, v9, 0
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v8, 2
+; ZIP-NEXT: vse16.v v9, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x half>, ptr %in0, align 2
%1 = load <4 x half>, ptr %in1, align 2
@@ -767,6 +1001,19 @@ define void @vnsrl_16_half_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v9, v8, 2
; ZVE32F-NEXT: vse16.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_16_half_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vle16.v v8, (a1)
+; ZIP-NEXT: vle16.v v9, (a0)
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vnsrl.wi v8, v8, 16
+; ZIP-NEXT: vnsrl.wi v9, v9, 16
+; ZIP-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v9, v8, 2
+; ZIP-NEXT: vse16.v v9, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <4 x half>, ptr %in0, align 2
%1 = load <4 x half>, ptr %in1, align 2
@@ -793,6 +1040,15 @@ define void @vnsrl_0_i32_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v8, v9, 1
; ZVE32F-NEXT: vse32.v v8, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i32_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vle32.v v9, (a1)
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: vse32.v v8, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x i32>, ptr %in0, align 4
%1 = load <2 x i32>, ptr %in1, align 4
@@ -821,6 +1077,15 @@ define void @vnsrl_32_i32_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t
; ZVE32F-NEXT: vse32.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_32_i32_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vle32.v v9, (a1)
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse32.v v10, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x i32>, ptr %in0, align 4
%1 = load <2 x i32>, ptr %in1, align 4
@@ -847,6 +1112,15 @@ define void @vnsrl_0_float_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v8, v9, 1
; ZVE32F-NEXT: vse32.v v8, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_float_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vle32.v v9, (a1)
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: vse32.v v8, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x float>, ptr %in0, align 4
%1 = load <2 x float>, ptr %in1, align 4
@@ -875,6 +1149,15 @@ define void @vnsrl_32_float_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslidedown.vi v9, v8, 1, v0.t
; ZVE32F-NEXT: vse32.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_32_float_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: vle32.v v9, (a1)
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse32.v v10, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x float>, ptr %in0, align 4
%1 = load <2 x float>, ptr %in1, align 4
@@ -901,6 +1184,15 @@ define void @vnsrl_0_i64_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslideup.vi v8, v9, 2
; ZVE32F-NEXT: vse32.v v8, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_i64_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vle64.v v9, (a1)
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: vse64.v v8, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x i64>, ptr %in0, align 8
%1 = load <2 x i64>, ptr %in1, align 8
@@ -929,6 +1221,15 @@ define void @vnsrl_64_i64_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: vslidedown.vi v9, v8, 2, v0.t
; ZVE32F-NEXT: vse32.v v9, (a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_64_i64_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vle64.v v9, (a1)
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse64.v v10, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x i64>, ptr %in0, align 8
%1 = load <2 x i64>, ptr %in1, align 8
@@ -954,6 +1255,15 @@ define void @vnsrl_0_double_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: sd a0, 0(a2)
; ZVE32F-NEXT: sd a1, 8(a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_0_double_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vle64.v v9, (a1)
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: vse64.v v8, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x double>, ptr %in0, align 8
%1 = load <2 x double>, ptr %in1, align 8
@@ -980,6 +1290,15 @@ define void @vnsrl_64_double_two_source(ptr %in0, ptr %in1, ptr %out) {
; ZVE32F-NEXT: sd a0, 0(a2)
; ZVE32F-NEXT: sd a1, 8(a2)
; ZVE32F-NEXT: ret
+;
+; ZIP-LABEL: vnsrl_64_double_two_source:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: vle64.v v8, (a0)
+; ZIP-NEXT: vle64.v v9, (a1)
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vse64.v v10, (a2)
+; ZIP-NEXT: ret
entry:
%0 = load <2 x double>, ptr %in0, align 8
%1 = load <2 x double>, ptr %in1, align 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index 6dd6d51862088..f6b5a35aa06d6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,V,RV32
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck %s --check-prefixes=CHECK,V,RV64
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh,+experimental-xrivosvizip | FileCheck %s --check-prefixes=CHECK,ZIP
; Integers
@@ -71,50 +72,88 @@ ret {<4 x i32>, <4 x i32>} %retval
}
define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2i64_v4i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v0, 1
-; CHECK-NEXT: vmv1r.v v9, v10
-; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vslideup.vi v8, v10, 1
-; CHECK-NEXT: ret
+; V-LABEL: vector_deinterleave_v2i64_v4i64:
+; V: # %bb.0:
+; V-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; V-NEXT: vslidedown.vi v10, v8, 2
+; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; V-NEXT: vmv.v.i v0, 1
+; V-NEXT: vmv1r.v v9, v10
+; V-NEXT: vslidedown.vi v9, v8, 1, v0.t
+; V-NEXT: vslideup.vi v8, v10, 1
+; V-NEXT: ret
+;
+; ZIP-LABEL: vector_deinterleave_v2i64_v4i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v9, v8, v10
+; ZIP-NEXT: vslideup.vi v8, v10, 1
+; ZIP-NEXT: ret
%retval = call {<2 x i64>, <2 x i64>} @llvm.vector.deinterleave2.v4i64(<4 x i64> %vec)
ret {<2 x i64>, <2 x i64>} %retval
}
define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
-; CHECK-LABEL: vector_deinterleave_v4i64_v8i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v0, 8
-; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v8, 4
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 2
-; CHECK-NEXT: vmv2r.v v12, v8
-; CHECK-NEXT: vmv.v.i v11, 12
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT: vslideup.vi v14, v16, 2
-; CHECK-NEXT: vslideup.vi v14, v16, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0
-; CHECK-NEXT: vslidedown.vi v14, v8, 1
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vslidedown.vi v14, v8, 2, v0.t
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v0, 4
-; CHECK-NEXT: vmv2r.v v8, v16
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vmerge.vvm v10, v14, v8, v0
-; CHECK-NEXT: vmv2r.v v8, v12
-; CHECK-NEXT: ret
+; V-LABEL: vector_deinterleave_v4i64_v8i64:
+; V: # %bb.0:
+; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; V-NEXT: vmv.v.i v0, 8
+; V-NEXT: vsetivli zero, 4, e64, m4, ta, ma
+; V-NEXT: vslidedown.vi v16, v8, 4
+; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; V-NEXT: vmv.v.i v10, 2
+; V-NEXT: vmv2r.v v12, v8
+; V-NEXT: vmv.v.i v11, 12
+; V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; V-NEXT: vslideup.vi v14, v16, 2
+; V-NEXT: vslideup.vi v14, v16, 1, v0.t
+; V-NEXT: vmv1r.v v0, v10
+; V-NEXT: vslidedown.vi v12, v8, 1, v0.t
+; V-NEXT: vmv1r.v v0, v11
+; V-NEXT: vmerge.vvm v12, v12, v14, v0
+; V-NEXT: vslidedown.vi v14, v8, 1
+; V-NEXT: vmv1r.v v0, v10
+; V-NEXT: vslidedown.vi v14, v8, 2, v0.t
+; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; V-NEXT: vmv.v.i v0, 4
+; V-NEXT: vmv2r.v v8, v16
+; V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; V-NEXT: vslideup.vi v8, v16, 1, v0.t
+; V-NEXT: vmv1r.v v0, v11
+; V-NEXT: vmerge.vvm v10, v14, v8, v0
+; V-NEXT: vmv2r.v v8, v12
+; V-NEXT: ret
+;
+; ZIP-LABEL: vector_deinterleave_v4i64_v8i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v12, v8, 1
+; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; ZIP-NEXT: vmv.v.i v0, 2
+; ZIP-NEXT: vmv.v.i v14, 12
+; ZIP-NEXT: vsetivli zero, 4, e64, m4, ta, ma
+; ZIP-NEXT: vslidedown.vi v16, v8, 4
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; ZIP-NEXT: vslidedown.vi v12, v8, 2, v0.t
+; ZIP-NEXT: ri.vzip2a.vv v18, v8, v10
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v8, v16, 2
+; ZIP-NEXT: vmv1r.v v0, v14
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; ZIP-NEXT: ri.vzip2a.vv v12, v16, v8, v0.t
+; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; ZIP-NEXT: vmv.v.i v0, 8
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; ZIP-NEXT: vslideup.vi v8, v16, 2
+; ZIP-NEXT: vslideup.vi v8, v16, 1, v0.t
+; ZIP-NEXT: vmv1r.v v0, v14
+; ZIP-NEXT: vmerge.vvm v8, v18, v8, v0
+; ZIP-NEXT: vmv2r.v v10, v12
+; ZIP-NEXT: ret
%retval = call {<4 x i64>, <4 x i64>} @llvm.vector.deinterleave2.v8i64(<8 x i64> %vec)
ret {<4 x i64>, <4 x i64>} %retval
}
@@ -408,6 +447,97 @@ define {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @v
; RV64-NEXT: addi sp, sp, 64
; RV64-NEXT: .cfi_def_cfa_offset 0
; RV64-NEXT: ret
+;
+; ZIP-LABEL: vector_deinterleave7_v14i8_v2i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -64
+; ZIP-NEXT: .cfi_def_cfa_offset 64
+; ZIP-NEXT: sd ra, 56(sp) # 8-byte Folded Spill
+; ZIP-NEXT: sd s0, 48(sp) # 8-byte Folded Spill
+; ZIP-NEXT: sd s1, 40(sp) # 8-byte Folded Spill
+; ZIP-NEXT: .cfi_offset ra, -8
+; ZIP-NEXT: .cfi_offset s0, -16
+; ZIP-NEXT: .cfi_offset s1, -24
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 2
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 4 * vlenb
+; ZIP-NEXT: addi a0, sp, 32
+; ZIP-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
+; ZIP-NEXT: csrr s1, vlenb
+; ZIP-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v11, v8, 10
+; ZIP-NEXT: vslidedown.vi v10, v8, 8
+; ZIP-NEXT: vslidedown.vi v9, v8, 2
+; ZIP-NEXT: srli s0, s1, 3
+; ZIP-NEXT: add a0, s0, s0
+; ZIP-NEXT: vsetvli zero, a0, e8, mf2, tu, ma
+; ZIP-NEXT: vslideup.vx v10, v11, s0
+; ZIP-NEXT: vmv1r.v v11, v8
+; ZIP-NEXT: vslideup.vx v11, v9, s0
+; ZIP-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 12
+; ZIP-NEXT: srli a0, s1, 2
+; ZIP-NEXT: add a1, a0, s0
+; ZIP-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; ZIP-NEXT: vslideup.vx v10, v9, a0
+; ZIP-NEXT: csrr a2, vlenb
+; ZIP-NEXT: slli a2, a2, 1
+; ZIP-NEXT: add a2, sp, a2
+; ZIP-NEXT: addi a2, a2, 32
+; ZIP-NEXT: vs1r.v v10, (a2) # vscale x 8-byte Folded Spill
+; ZIP-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v9, v8, 4
+; ZIP-NEXT: vsetvli zero, a1, e8, mf2, tu, ma
+; ZIP-NEXT: vslideup.vx v11, v9, a0
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: add a0, sp, a0
+; ZIP-NEXT: addi a0, a0, 32
+; ZIP-NEXT: vs1r.v v11, (a0) # vscale x 8-byte Folded Spill
+; ZIP-NEXT: li a1, 3
+; ZIP-NEXT: mv a0, s0
+; ZIP-NEXT: call __muldi3
+; ZIP-NEXT: add s0, a0, s0
+; ZIP-NEXT: addi a1, sp, 32
+; ZIP-NEXT: vl1r.v v8, (a1) # vscale x 8-byte Folded Reload
+; ZIP-NEXT: vsetivli zero, 2, e8, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v8, v8, 6
+; ZIP-NEXT: srli s1, s1, 1
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 32
+; ZIP-NEXT: vl1r.v v9, (a1) # vscale x 8-byte Folded Reload
+; ZIP-NEXT: vsetvli zero, s0, e8, mf2, ta, ma
+; ZIP-NEXT: vslideup.vx v9, v8, a0
+; ZIP-NEXT: add a0, s1, s1
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: slli a1, a1, 1
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 32
+; ZIP-NEXT: vl1r.v v8, (a1) # vscale x 8-byte Folded Reload
+; ZIP-NEXT: vsetvli zero, a0, e8, m1, ta, ma
+; ZIP-NEXT: vslideup.vx v9, v8, s1
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a1, a0, 1
+; ZIP-NEXT: add a0, a1, a0
+; ZIP-NEXT: add a0, sp, a0
+; ZIP-NEXT: addi a0, a0, 32
+; ZIP-NEXT: vs1r.v v9, (a0)
+; ZIP-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
+; ZIP-NEXT: vlseg7e8.v v8, (a0)
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 2
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 64
+; ZIP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload
+; ZIP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload
+; ZIP-NEXT: ld s1, 40(sp) # 8-byte Folded Reload
+; ZIP-NEXT: .cfi_restore ra
+; ZIP-NEXT: .cfi_restore s0
+; ZIP-NEXT: .cfi_restore s1
+; ZIP-NEXT: addi sp, sp, 64
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} @llvm.vector.deinterleave7.v14i8(<14 x i8> %v)
ret {<2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>, <2 x i8>} %res
}
@@ -479,50 +609,88 @@ ret {<4 x float>, <4 x float>} %retval
}
define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) {
-; CHECK-LABEL: vector_deinterleave_v2f64_v4f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma
-; CHECK-NEXT: vslidedown.vi v10, v8, 2
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT: vmv.v.i v0, 1
-; CHECK-NEXT: vmv1r.v v9, v10
-; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
-; CHECK-NEXT: vslideup.vi v8, v10, 1
-; CHECK-NEXT: ret
+; V-LABEL: vector_deinterleave_v2f64_v4f64:
+; V: # %bb.0:
+; V-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; V-NEXT: vslidedown.vi v10, v8, 2
+; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu
+; V-NEXT: vmv.v.i v0, 1
+; V-NEXT: vmv1r.v v9, v10
+; V-NEXT: vslidedown.vi v9, v8, 1, v0.t
+; V-NEXT: vslideup.vi v8, v10, 1
+; V-NEXT: ret
+;
+; ZIP-LABEL: vector_deinterleave_v2f64_v4f64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v9, v8, v10
+; ZIP-NEXT: vslideup.vi v8, v10, 1
+; ZIP-NEXT: ret
%retval = call {<2 x double>, <2 x double>} @llvm.vector.deinterleave2.v4f64(<4 x double> %vec)
ret {<2 x double>, <2 x double>} %retval
}
define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double> %vec) {
-; CHECK-LABEL: vector_deinterleave_v4f64_v8f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v0, 8
-; CHECK-NEXT: vsetivli zero, 4, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v16, v8, 4
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v10, 2
-; CHECK-NEXT: vmv2r.v v12, v8
-; CHECK-NEXT: vmv.v.i v11, 12
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT: vslideup.vi v14, v16, 2
-; CHECK-NEXT: vslideup.vi v14, v16, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vslidedown.vi v12, v8, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vmerge.vvm v12, v12, v14, v0
-; CHECK-NEXT: vslidedown.vi v14, v8, 1
-; CHECK-NEXT: vmv1r.v v0, v10
-; CHECK-NEXT: vslidedown.vi v14, v8, 2, v0.t
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v0, 4
-; CHECK-NEXT: vmv2r.v v8, v16
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t
-; CHECK-NEXT: vmv1r.v v0, v11
-; CHECK-NEXT: vmerge.vvm v10, v14, v8, v0
-; CHECK-NEXT: vmv2r.v v8, v12
-; CHECK-NEXT: ret
+; V-LABEL: vector_deinterleave_v4f64_v8f64:
+; V: # %bb.0:
+; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; V-NEXT: vmv.v.i v0, 8
+; V-NEXT: vsetivli zero, 4, e64, m4, ta, ma
+; V-NEXT: vslidedown.vi v16, v8, 4
+; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; V-NEXT: vmv.v.i v10, 2
+; V-NEXT: vmv2r.v v12, v8
+; V-NEXT: vmv.v.i v11, 12
+; V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; V-NEXT: vslideup.vi v14, v16, 2
+; V-NEXT: vslideup.vi v14, v16, 1, v0.t
+; V-NEXT: vmv1r.v v0, v10
+; V-NEXT: vslidedown.vi v12, v8, 1, v0.t
+; V-NEXT: vmv1r.v v0, v11
+; V-NEXT: vmerge.vvm v12, v12, v14, v0
+; V-NEXT: vslidedown.vi v14, v8, 1
+; V-NEXT: vmv1r.v v0, v10
+; V-NEXT: vslidedown.vi v14, v8, 2, v0.t
+; V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; V-NEXT: vmv.v.i v0, 4
+; V-NEXT: vmv2r.v v8, v16
+; V-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; V-NEXT: vslideup.vi v8, v16, 1, v0.t
+; V-NEXT: vmv1r.v v0, v11
+; V-NEXT: vmerge.vvm v10, v14, v8, v0
+; V-NEXT: vmv2r.v v8, v12
+; V-NEXT: ret
+;
+; ZIP-LABEL: vector_deinterleave_v4f64_v8f64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; ZIP-NEXT: vmv.v.i v0, 8
+; ZIP-NEXT: vsetivli zero, 4, e64, m4, ta, ma
+; ZIP-NEXT: vslidedown.vi v16, v8, 4
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v12, v8, 2
+; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; ZIP-NEXT: vmv.v.i v10, 12
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; ZIP-NEXT: vslideup.vi v14, v16, 2
+; ZIP-NEXT: vslideup.vi v14, v16, 1, v0.t
+; ZIP-NEXT: ri.vzip2a.vv v18, v8, v12
+; ZIP-NEXT: vmv1r.v v0, v10
+; ZIP-NEXT: vmerge.vvm v12, v18, v14, v0
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v14, v16, 2
+; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
+; ZIP-NEXT: vmv.v.i v0, 2
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
+; ZIP-NEXT: ri.vzip2a.vv v18, v16, v14
+; ZIP-NEXT: vslidedown.vi v14, v8, 1
+; ZIP-NEXT: vslidedown.vi v14, v8, 2, v0.t
+; ZIP-NEXT: vmv1r.v v0, v10
+; ZIP-NEXT: vmerge.vvm v10, v14, v18, v0
+; ZIP-NEXT: vmv2r.v v8, v12
+; ZIP-NEXT: ret
%retval = call {<4 x double>, <4 x double>} @llvm.vector.deinterleave2.v8f64(<8 x double> %vec)
ret {<4 x double>, <4 x double>} %retval
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index e2f81acc9cf99..279779dc49667 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK,RV32 %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfh | FileCheck -check-prefixes=CHECK %s
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh | FileCheck %s --check-prefix=ZVBB
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zvfh,+experimental-xrivosvizip | FileCheck %s --check-prefix=ZIP
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zvfh,+experimental-xrivosvizip | FileCheck %s --check-prefix=ZIP
; Integers
@@ -41,6 +43,21 @@ define <32 x i1> @vector_interleave_v32i1_v16i1(<16 x i1> %a, <16 x i1> %b) {
; ZVBB-NEXT: vsetvli zero, a0, e8, m2, ta, ma
; ZVBB-NEXT: vmsne.vi v0, v12, 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v32i1_v16i1:
+; ZIP: # %bb.0:
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZIP-NEXT: vslideup.vi v0, v8, 2
+; ZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; ZIP-NEXT: vmv.v.i v8, 0
+; ZIP-NEXT: vmerge.vim v8, v8, 1, v0
+; ZIP-NEXT: vsetivli zero, 16, e8, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 16
+; ZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v12, v8, v10
+; ZIP-NEXT: vmsne.vi v0, v12, 0
+; ZIP-NEXT: ret
%res = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %a, <16 x i1> %b)
ret <32 x i1> %res
}
@@ -64,6 +81,14 @@ define <16 x i16> @vector_interleave_v16i16_v8i16(<8 x i16> %a, <8 x i16> %b) {
; ZVBB-NEXT: vwsll.vi v8, v10, 16
; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v16i16_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %a, <8 x i16> %b)
ret <16 x i16> %res
}
@@ -88,6 +113,14 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
; ZVBB-NEXT: vwsll.vx v8, v10, a0
; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v8i32_v4i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %a, <4 x i32> %b)
ret <8 x i32> %res
}
@@ -122,6 +155,14 @@ define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v4i64_v2i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
ret <4 x i64> %res
}
@@ -186,6 +227,36 @@ define <6 x i32> @vector_interleave3_v6i32_v2i32(<2 x i32> %a, <2 x i32> %b, <2
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave3_v6i32_v2i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: srli a1, a1, 1
+; ZIP-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; ZIP-NEXT: vsseg3e32.v v8, (a0)
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: vle32.v v9, (a2)
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: add a1, a2, a1
+; ZIP-NEXT: vle32.v v10, (a1)
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 2
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v10, 4
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call <6 x i32> @llvm.vector.interleave3.v6i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c)
ret <6 x i32> %res
}
@@ -265,6 +336,43 @@ define <10 x i16> @vector_interleave5_v10i16_v2i16(<2 x i16> %a, <2 x i16> %b, <
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave5_v10i16_v2i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: srli a1, a1, 2
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: add a3, a2, a1
+; ZIP-NEXT: vsetvli a4, zero, e16, mf4, ta, ma
+; ZIP-NEXT: vsseg5e16.v v8, (a0)
+; ZIP-NEXT: add a4, a3, a1
+; ZIP-NEXT: vle16.v v9, (a2)
+; ZIP-NEXT: vle16.v v11, (a4)
+; ZIP-NEXT: vle16.v v12, (a3)
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: add a1, a4, a1
+; ZIP-NEXT: vle16.v v10, (a1)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: vslideup.vi v12, v11, 2
+; ZIP-NEXT: vslideup.vi v8, v9, 2
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v12, 4
+; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v10, 8
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call <10 x i16> @llvm.vector.interleave5.v10i16(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x i16> %d, <2 x i16> %e)
ret <10 x i16> %res
}
@@ -353,6 +461,48 @@ define <14 x i8> @vector_interleave7_v14i8_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave7_v14i8_v2i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x01, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 1 * vlenb
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: srli a1, a1, 3
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: add a3, a2, a1
+; ZIP-NEXT: add a4, a3, a1
+; ZIP-NEXT: vsetvli a5, zero, e8, mf8, ta, ma
+; ZIP-NEXT: vsseg7e8.v v8, (a0)
+; ZIP-NEXT: vle8.v v9, (a4)
+; ZIP-NEXT: add a4, a4, a1
+; ZIP-NEXT: vle8.v v10, (a2)
+; ZIP-NEXT: add a2, a4, a1
+; ZIP-NEXT: add a1, a2, a1
+; ZIP-NEXT: vle8.v v11, (a2)
+; ZIP-NEXT: vle8.v v12, (a4)
+; ZIP-NEXT: vle8.v v8, (a0)
+; ZIP-NEXT: vle8.v v13, (a1)
+; ZIP-NEXT: vle8.v v14, (a3)
+; ZIP-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
+; ZIP-NEXT: vslideup.vi v12, v11, 2
+; ZIP-NEXT: vslideup.vi v8, v10, 2
+; ZIP-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
+; ZIP-NEXT: vslideup.vi v12, v13, 4
+; ZIP-NEXT: vslideup.vi v8, v14, 4
+; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 6
+; ZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v12, 8
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call <14 x i8> @llvm.vector.interleave7.v14i8(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d, <2 x i8> %e, <2 x i8> %f, <2 x i8> %g)
ret <14 x i8> %res
}
@@ -377,6 +527,13 @@ define <4 x half> @vector_interleave_v4f16_v2f16(<2 x half> %a, <2 x half> %b) {
; ZVBB-NEXT: vwaddu.wv v10, v10, v8
; ZVBB-NEXT: vmv1r.v v8, v10
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v4f16_v2f16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv1r.v v8, v10
+; ZIP-NEXT: ret
%res = call <4 x half> @llvm.vector.interleave2.v4f16(<2 x half> %a, <2 x half> %b)
ret <4 x half> %res
}
@@ -398,6 +555,13 @@ define <8 x half> @vector_interleave_v8f16_v4f16(<4 x half> %a, <4 x half> %b) {
; ZVBB-NEXT: vwaddu.wv v10, v10, v8
; ZVBB-NEXT: vmv1r.v v8, v10
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v8f16_v4f16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <8 x half> @llvm.vector.interleave2.v8f16(<4 x half> %a, <4 x half> %b)
ret <8 x half> %res
}
@@ -420,6 +584,13 @@ define <4 x float> @vector_interleave_v4f32_v2f32(<2 x float> %a, <2 x float> %b
; ZVBB-NEXT: vwaddu.wv v10, v10, v8
; ZVBB-NEXT: vmv1r.v v8, v10
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v4f32_v2f32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <4 x float> @llvm.vector.interleave2.v4f32(<2 x float> %a, <2 x float> %b)
ret <4 x float> %res
}
@@ -443,6 +614,14 @@ define <16 x half> @vector_interleave_v16f16_v8f16(<8 x half> %a, <8 x half> %b)
; ZVBB-NEXT: vwsll.vi v8, v10, 16
; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v16f16_v8f16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <16 x half> @llvm.vector.interleave2.v16f16(<8 x half> %a, <8 x half> %b)
ret <16 x half> %res
}
@@ -467,6 +646,14 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
; ZVBB-NEXT: vwsll.vx v8, v10, a0
; ZVBB-NEXT: vwaddu.wv v8, v8, v11
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v8f32_v4f32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <8 x float> @llvm.vector.interleave2.v8f32(<4 x float> %a, <4 x float> %b)
ret <8 x float> %res
}
@@ -501,6 +688,14 @@ define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double>
; ZVBB-NEXT: vrgatherei16.vv v10, v8, v12
; ZVBB-NEXT: vmv.v.v v8, v10
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave_v4f64_v2f64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%res = call <4 x double> @llvm.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
ret <4 x double> %res
}
@@ -565,6 +760,36 @@ define <6 x float> @vector_interleave3_v632_v2f32(<2 x float> %a, <2 x float> %b
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave3_v632_v2f32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: srli a1, a1, 1
+; ZIP-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; ZIP-NEXT: vsseg3e32.v v8, (a0)
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: vle32.v v9, (a2)
+; ZIP-NEXT: vle32.v v8, (a0)
+; ZIP-NEXT: add a1, a2, a1
+; ZIP-NEXT: vle32.v v10, (a1)
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 2
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v10, 4
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call <6 x float> @llvm.vector.interleave3.v6f32(<2 x float> %a, <2 x float> %b, <2 x float> %c)
ret <6 x float> %res
}
@@ -644,6 +869,43 @@ define <10 x half> @vector_interleave5_v10f16_v2f16(<2 x half> %a, <2 x half> %b
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave5_v10f16_v2f16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: srli a1, a1, 2
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: add a3, a2, a1
+; ZIP-NEXT: vsetvli a4, zero, e16, mf4, ta, ma
+; ZIP-NEXT: vsseg5e16.v v8, (a0)
+; ZIP-NEXT: add a4, a3, a1
+; ZIP-NEXT: vle16.v v9, (a2)
+; ZIP-NEXT: vle16.v v11, (a4)
+; ZIP-NEXT: vle16.v v12, (a3)
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: add a1, a4, a1
+; ZIP-NEXT: vle16.v v10, (a1)
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: vslideup.vi v12, v11, 2
+; ZIP-NEXT: vslideup.vi v8, v9, 2
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v12, 4
+; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v10, 8
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call <10 x half> @llvm.vector.interleave5.v10f16(<2 x half> %a, <2 x half> %b, <2 x half> %c, <2 x half> %d, <2 x half> %e)
ret <10 x half> %res
}
@@ -736,9 +998,50 @@ define <7 x half> @vector_interleave7_v7f16_v1f16(<1 x half> %a, <1 x half> %b,
; ZVBB-NEXT: addi sp, sp, 16
; ZVBB-NEXT: .cfi_def_cfa_offset 0
; ZVBB-NEXT: ret
+;
+; ZIP-LABEL: vector_interleave7_v7f16_v1f16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: srli a1, a1, 2
+; ZIP-NEXT: add a2, a0, a1
+; ZIP-NEXT: add a3, a2, a1
+; ZIP-NEXT: add a4, a3, a1
+; ZIP-NEXT: vsetvli a5, zero, e16, mf4, ta, ma
+; ZIP-NEXT: vsseg7e16.v v8, (a0)
+; ZIP-NEXT: vle16.v v9, (a4)
+; ZIP-NEXT: add a4, a4, a1
+; ZIP-NEXT: vle16.v v10, (a2)
+; ZIP-NEXT: add a2, a4, a1
+; ZIP-NEXT: add a1, a2, a1
+; ZIP-NEXT: vle16.v v11, (a2)
+; ZIP-NEXT: vle16.v v12, (a4)
+; ZIP-NEXT: vle16.v v8, (a0)
+; ZIP-NEXT: vle16.v v13, (a1)
+; ZIP-NEXT: vle16.v v14, (a3)
+; ZIP-NEXT: vsetivli zero, 2, e16, mf2, tu, ma
+; ZIP-NEXT: vslideup.vi v12, v11, 1
+; ZIP-NEXT: vslideup.vi v8, v10, 1
+; ZIP-NEXT: vsetivli zero, 3, e16, mf2, tu, ma
+; ZIP-NEXT: vslideup.vi v12, v13, 2
+; ZIP-NEXT: vslideup.vi v8, v14, 2
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 3
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: vslideup.vi v8, v12, 4
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 1
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%res = call <7 x half> @llvm.vector.interleave7.v7f16(<1 x half> %a, <1 x half> %b, <1 x half> %c, <1 x half> %d, <1 x half> %e, <1 x half> %f, <1 x half> %g)
ret <7 x half> %res
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32: {{.*}}
-; RV64: {{.*}}
More information about the llvm-commits
mailing list