[llvm] f8ee58a - [RISCV] Initial codegen support for the XRivosVizip extension (#131933)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 29 15:25:59 PDT 2025
Author: Philip Reames
Date: 2025-03-29T15:25:56-07:00
New Revision: f8ee58a3cbbe4de2ac6d006fad8363ae6d24aa57
URL: https://github.com/llvm/llvm-project/commit/f8ee58a3cbbe4de2ac6d006fad8363ae6d24aa57
DIFF: https://github.com/llvm/llvm-project/commit/f8ee58a3cbbe4de2ac6d006fad8363ae6d24aa57.diff
LOG: [RISCV] Initial codegen support for the XRivosVizip extension (#131933)
This implements initial code generation support for a subset of the
xrivosvizip extension. Specifically, this adds support for vzipeven,
vzipodd, and vzip2a, but not vzip2b, vunzip2a, or vunzip2b. The others
will follow in separate patches.
One review note: The zipeven/zipodd matchers were recently rewritten to
better match upstream style, so careful review there would be
appreciated. The matchers don't yet support type coercion to wider
types. This will be done in a future patch.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index beca99c10ec72..a8c83113854c9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4563,8 +4563,10 @@ static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) {
/// way through the source.
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
int &OddSrc, const RISCVSubtarget &Subtarget) {
- // We need to be able to widen elements to the next larger integer type.
- if (VT.getScalarSizeInBits() >= Subtarget.getELen())
+ // We need to be able to widen elements to the next larger integer type or
+ // use the zip2a instruction at e64.
+ if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
+ !Subtarget.hasVendorXRivosVizip())
return false;
int Size = Mask.size();
@@ -4621,6 +4623,48 @@ static bool isElementRotate(const std::array<std::pair<int, int>, 2> &SrcInfo,
SrcInfo[1].second - SrcInfo[0].second == (int)NumElts;
}
+static bool isAlternating(const std::array<std::pair<int, int>, 2> &SrcInfo,
+ ArrayRef<int> Mask, bool RequiredPolarity) {
+ int NumElts = Mask.size();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+ int Src = M >= NumElts;
+ int Diff = (int)i - (M % NumElts);
+ bool C = Src == SrcInfo[1].first && Diff == SrcInfo[1].second;
+ assert(C != (Src == SrcInfo[0].first && Diff == SrcInfo[0].second) &&
+ "Must match exactly one of the two slides");
+ if (RequiredPolarity != (C == i % 2))
+ return false;
+ }
+ return true;
+}
+
+/// Given a shuffle which can be represented as a pair of two slides,
+/// see if it is a zipeven idiom. Zipeven is:
+/// vs2: a0 a1 a2 a3
+/// vs1: b0 b1 b2 b3
+/// vd: a0 b0 a2 b2
+static bool isZipEven(const std::array<std::pair<int, int>, 2> &SrcInfo,
+ ArrayRef<int> Mask) {
+ return SrcInfo[0].second == 0 && SrcInfo[1].second == 1 &&
+ isAlternating(SrcInfo, Mask, true);
+}
+
+/// Given a shuffle which can be represented as a pair of two slides,
+/// see if it is a zipodd idiom. Zipodd is:
+/// vs2: a0 a1 a2 a3
+/// vs1: b0 b1 b2 b3
+/// vd: a1 b1 a3 b3
+/// Note that the operand order is swapped due to the way we canonicalize
+/// the slides, so SrCInfo[0] is vs1, and SrcInfo[1] is vs2.
+static bool isZipOdd(const std::array<std::pair<int, int>, 2> &SrcInfo,
+ ArrayRef<int> Mask) {
+ return SrcInfo[0].second == 0 && SrcInfo[1].second == -1 &&
+ isAlternating(SrcInfo, Mask, false);
+}
+
// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be
// 2, 4, 8 and the integer type Factor-times larger than VT's
// element type must be a legal element type.
@@ -4880,6 +4924,34 @@ static bool isSpreadMask(ArrayRef<int> Mask, unsigned Factor, unsigned &Index) {
return true;
}
+static SDValue lowerVZIP(unsigned Opc, SDValue Op0, SDValue Op1,
+ const SDLoc &DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(RISCVISD::RI_VZIPEVEN_VL == Opc || RISCVISD::RI_VZIPODD_VL == Opc ||
+ RISCVISD::RI_VZIP2A_VL == Opc);
+ assert(Op0.getSimpleValueType() == Op1.getSimpleValueType());
+
+ MVT VT = Op0.getSimpleValueType();
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ Op0 = DAG.getBitcast(IntVT, Op0);
+ Op1 = DAG.getBitcast(IntVT, Op1);
+
+ MVT ContainerVT = IntVT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, IntVT, Subtarget);
+ Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(IntVT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Passthru = DAG.getUNDEF(ContainerVT);
+ SDValue Res = DAG.getNode(Opc, DL, ContainerVT, Op0, Op1, Passthru, Mask, VL);
+ if (IntVT.isFixedLengthVector())
+ Res = convertFromScalableVector(IntVT, Res, DAG, Subtarget);
+ Res = DAG.getBitcast(VT, Res);
+ return Res;
+}
+
// Given a vector a, b, c, d return a vector Factor times longer
// with Factor-1 undef's between elements. Ex:
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
@@ -5619,6 +5691,15 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
DAG.getVectorIdxConstant(OddSrc % Size, DL));
}
+ // Prefer vzip2a if available.
+ // TODO: Extend to matching zip2b if EvenSrc and OddSrc allow.
+ if (Subtarget.hasVendorXRivosVizip()) {
+ EvenV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT),
+ EvenV, DAG.getVectorIdxConstant(0, DL));
+ OddV = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), OddV,
+ DAG.getVectorIdxConstant(0, DL));
+ return lowerVZIP(RISCVISD::RI_VZIP2A_VL, EvenV, OddV, DL, DAG, Subtarget);
+ }
return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
}
@@ -5670,6 +5751,18 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
+ if (Subtarget.hasVendorXRivosVizip() && isZipEven(SrcInfo, Mask)) {
+ SDValue Src1 = SrcInfo[0].first == 0 ? V1 : V2;
+ SDValue Src2 = SrcInfo[1].first == 0 ? V1 : V2;
+ return lowerVZIP(RISCVISD::RI_VZIPEVEN_VL, Src1, Src2, DL, DAG,
+ Subtarget);
+ }
+ if (Subtarget.hasVendorXRivosVizip() && isZipOdd(SrcInfo, Mask)) {
+ SDValue Src1 = SrcInfo[1].first == 0 ? V1 : V2;
+ SDValue Src2 = SrcInfo[0].first == 0 ? V1 : V2;
+ return lowerVZIP(RISCVISD::RI_VZIPODD_VL, Src1, Src2, DL, DAG, Subtarget);
+ }
+
// Build the mask. Note that vslideup unconditionally preserves elements
// below the slide amount in the destination, and thus those elements are
// undefined in the mask. If the mask ends up all true (or undef), it
@@ -6733,7 +6826,7 @@ static bool hasPassthruOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::VFMAX_VL)
@@ -6757,12 +6850,13 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(
- RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 127 &&
+ RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == 130 &&
RISCVISD::LAST_STRICTFP_OPCODE - RISCVISD::FIRST_STRICTFP_OPCODE == 21 &&
"adding target specific op should update this function");
if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
return true;
- if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
+ if (Opcode >= RISCVISD::VRGATHER_VX_VL &&
+ Opcode <= RISCVISD::LAST_VL_VECTOR_OP)
return true;
if (Opcode >= RISCVISD::STRICT_FADD_VL &&
Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
@@ -21807,6 +21901,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
NODE_NAME_CASE(VFIRST_VL)
+ NODE_NAME_CASE(RI_VZIPEVEN_VL)
+ NODE_NAME_CASE(RI_VZIPODD_VL)
+ NODE_NAME_CASE(RI_VZIP2A_VL)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index f4d6cd86397a4..5ebdbbd51f2b1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -403,7 +403,12 @@ enum NodeType : unsigned {
// vfirst.m with additional mask and VL operands.
VFIRST_VL,
- LAST_VL_VECTOR_OP = VFIRST_VL,
+ // XRivosVizip
+ RI_VZIPEVEN_VL,
+ RI_VZIPODD_VL,
+ RI_VZIP2A_VL,
+
+ LAST_VL_VECTOR_OP = RI_VZIP2A_VL,
// Read VLENB CSR
READ_VLENB,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
index 78c4ed6f00412..3fe50503f937b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXRivos.td
@@ -67,6 +67,38 @@ defm RI_VUNZIP2A_V : VALU_IV_V<"ri.vunzip2a", 0b001000>;
defm RI_VUNZIP2B_V : VALU_IV_V<"ri.vunzip2b", 0b011000>;
}
+// These are modeled after the int binop VL nodes
+def ri_vzipeven_vl : SDNode<"RISCVISD::RI_VZIPEVEN_VL", SDT_RISCVIntBinOp_VL>;
+def ri_vzipodd_vl : SDNode<"RISCVISD::RI_VZIPODD_VL", SDT_RISCVIntBinOp_VL>;
+def ri_vzip2a_vl : SDNode<"RISCVISD::RI_VZIP2A_VL", SDT_RISCVIntBinOp_VL>;
+
+multiclass RIVPseudoVALU_VV {
+ foreach m = MxList in
+ defm "" : VPseudoBinaryV_VV<m, Commutable=0>;
+}
+
+let Predicates = [HasVendorXRivosVizip],
+ Constraints = "@earlyclobber $rd, $rd = $passthru" in {
+defm PseudoRI_VZIPEVEN : RIVPseudoVALU_VV;
+defm PseudoRI_VZIPODD : RIVPseudoVALU_VV;
+defm PseudoRI_VZIP2A : RIVPseudoVALU_VV;
+}
+
+multiclass RIVPatBinaryVL_VV<SDPatternOperator vop, string instruction_name,
+ list<VTypeInfo> vtilist = AllIntegerVectors,
+ bit isSEWAware = false> {
+ foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.RegClass, isSEWAware>;
+}
+
+defm : RIVPatBinaryVL_VV<ri_vzipeven_vl, "PseudoRI_VZIPEVEN">;
+defm : RIVPatBinaryVL_VV<ri_vzipodd_vl, "PseudoRI_VZIPODD">;
+defm : RIVPatBinaryVL_VV<ri_vzip2a_vl, "PseudoRI_VZIP2A">;
+
//===----------------------------------------------------------------------===//
// XRivosVisni
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
index 6ed288ff011e7..917613d5c786f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl128b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+zvl512b -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV32-ZIP
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,RV64-ZIP
; Test optimizing interleaves to widening arithmetic.
@@ -15,6 +17,13 @@ define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
; CHECK-NEXT: vwmaccu.vx v10, a0, v9
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: interleave_v2i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv1r.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i8> %a
}
@@ -28,6 +37,13 @@ define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
; CHECK-NEXT: vwmaccu.vx v10, a0, v9
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: interleave_v2i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv1r.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i16> %a
}
@@ -42,6 +58,13 @@ define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) {
; CHECK-NEXT: vwmaccu.vx v10, a0, v8
; CHECK-NEXT: vmv1r.v v8, v10
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: interleave_v2i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
ret <4 x i32> %a
}
@@ -72,6 +95,14 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
; V512-NEXT: vslideup.vi v11, v8, 1
; V512-NEXT: vmerge.vvm v8, v11, v10, v0
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v2i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i64> %a
}
@@ -95,6 +126,13 @@ define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v8
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v4i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v9, v8
+; ZIP-NEXT: vmv1r.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
ret <8 x i8> %a
}
@@ -118,6 +156,13 @@ define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 4, i32 undef, i32 5, i32 2, i32 undef, i32 3, i32 7>
ret <8 x i16> %a
}
@@ -141,6 +186,14 @@ define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v4i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i32> %a
}
@@ -167,6 +220,15 @@ define <4 x i32> @interleave_v4i32_offset_2(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vwmaccu.vx v9, a0, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v4i32_offset_2:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v9, 2
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
+; ZIP-NEXT: vmv.v.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 6, i32 1, i32 7>
ret <4 x i32> %a
}
@@ -198,6 +260,17 @@ define <4 x i32> @interleave_v4i32_offset_1(<4 x i32> %x, <4 x i32> %y) {
; V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
; V512-NEXT: vmerge.vvm v8, v9, v10, v0
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v4i32_offset_1:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, mu
+; ZIP-NEXT: vmv.v.i v0, 8
+; ZIP-NEXT: vmv1r.v v10, v9
+; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
+; ZIP-NEXT: vmv.v.i v0, 10
+; ZIP-NEXT: ri.vzip2a.vv v11, v8, v9
+; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
+; ZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 5, i32 1, i32 6>
ret <4 x i32> %a
}
@@ -220,6 +293,13 @@ define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v8i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i8> %a
}
@@ -244,6 +324,14 @@ define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v8
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v12, v8
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
ret <16 x i16> %a
}
@@ -267,6 +355,14 @@ define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v8i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; ZIP-NEXT: vmv2r.v v16, v10
+; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
%a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
ret <16 x i32> %a
}
@@ -290,6 +386,16 @@ define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v16i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetvli zero, a0, e8, m2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i8> %a
}
@@ -313,6 +419,16 @@ define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v16i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZIP-NEXT: vmv2r.v v16, v10
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
%a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i16> %a
}
@@ -337,6 +453,16 @@ define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
; V512-NEXT: li a0, -1
; V512-NEXT: vwmaccu.vx v8, a0, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v16i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZIP-NEXT: vmv4r.v v24, v12
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24
+; ZIP-NEXT: vmv.v.v v8, v16
+; ZIP-NEXT: ret
%a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
ret <32 x i32> %a
}
@@ -363,6 +489,16 @@ define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v32i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZIP-NEXT: vmv2r.v v16, v10
+; ZIP-NEXT: li a0, 64
+; ZIP-NEXT: vsetvli zero, a0, e8, m4, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v12, v8, v16
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
%a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i8> %a
}
@@ -391,6 +527,16 @@ define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
; V512-NEXT: li a0, -1
; V512-NEXT: vwmaccu.vx v8, a0, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v32i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; ZIP-NEXT: vmv4r.v v24, v12
+; ZIP-NEXT: li a0, 64
+; ZIP-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24
+; ZIP-NEXT: vmv.v.v v8, v16
+; ZIP-NEXT: ret
%a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i16> %a
}
@@ -446,6 +592,78 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; V512-NEXT: li a0, -1
; V512-NEXT: vwmaccu.vx v8, a0, v12
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_v32i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: addi sp, sp, -16
+; ZIP-NEXT: .cfi_def_cfa_offset 16
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 5
+; ZIP-NEXT: sub sp, sp, a0
+; ZIP-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: li a1, 24
+; ZIP-NEXT: mul a0, a0, a1
+; ZIP-NEXT: add a0, sp, a0
+; ZIP-NEXT: addi a0, a0, 16
+; ZIP-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZIP-NEXT: vslidedown.vi v24, v8, 16
+; ZIP-NEXT: li a0, 32
+; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v16, v24, v0
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: li a2, 24
+; ZIP-NEXT: mul a1, a1, a2
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: vsetivli zero, 16, e32, m8, ta, ma
+; ZIP-NEXT: vslidedown.vi v24, v24, 16
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: slli a1, a1, 4
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vs8r.v v24, (a1) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: lui a1, 699051
+; ZIP-NEXT: addi a1, a1, -1366
+; ZIP-NEXT: vmv.s.x v0, a1
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: slli a1, a1, 3
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vs8r.v v8, (a1) # vscale x 64-byte Folded Spill
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: slli a1, a1, 4
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vl8r.v v24, (a1) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: csrr a1, vlenb
+; ZIP-NEXT: slli a1, a1, 3
+; ZIP-NEXT: add a1, sp, a1
+; ZIP-NEXT: addi a1, a1, 16
+; ZIP-NEXT: vl8r.v v8, (a1) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: vsetvli zero, a0, e32, m8, ta, mu
+; ZIP-NEXT: ri.vzip2a.vv v16, v8, v24, v0.t
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: li a1, 24
+; ZIP-NEXT: mul a0, a0, a1
+; ZIP-NEXT: add a0, sp, a0
+; ZIP-NEXT: addi a0, a0, 16
+; ZIP-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: addi a0, sp, 16
+; ZIP-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
+; ZIP-NEXT: ri.vzip2a.vv v0, v8, v24
+; ZIP-NEXT: vmv.v.v v8, v0
+; ZIP-NEXT: csrr a0, vlenb
+; ZIP-NEXT: slli a0, a0, 5
+; ZIP-NEXT: add sp, sp, a0
+; ZIP-NEXT: .cfi_def_cfa sp, 16
+; ZIP-NEXT: addi sp, sp, 16
+; ZIP-NEXT: .cfi_def_cfa_offset 0
+; ZIP-NEXT: ret
%a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
ret <64 x i32> %a
}
@@ -471,6 +689,15 @@ define <4 x i8> @unary_interleave_v4i8(<4 x i8> %x) {
; V512-NEXT: vwmaccu.vx v9, a0, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v4i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZIP-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
+; ZIP-NEXT: vmv1r.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i8> %a
}
@@ -498,6 +725,17 @@ define <4 x i8> @unary_interleave_v4i8_invalid(<4 x i8> %x) {
; V512-NEXT: vrgather.vv v9, v8, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v4i8_invalid:
+; ZIP: # %bb.0:
+; ZIP-NEXT: lui a0, 16
+; ZIP-NEXT: addi a0, a0, 768
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: vmv.s.x v10, a0
+; ZIP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
+; ZIP-NEXT: vrgather.vv v9, v8, v10
+; ZIP-NEXT: vmv1r.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 4>
ret <4 x i8> %a
}
@@ -523,6 +761,15 @@ define <4 x i16> @unary_interleave_v4i16(<4 x i16> %x) {
; V512-NEXT: vwmaccu.vx v9, a0, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v4i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e16, mf2, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
+; ZIP-NEXT: vmv1r.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i16> %a
}
@@ -548,6 +795,15 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
; V512-NEXT: vwmaccu.vx v9, a0, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v4i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 2
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
+; ZIP-NEXT: vmv.v.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i32> %a
}
@@ -590,6 +846,15 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
; RV64-V512-NEXT: vrgather.vv v9, v8, v10
; RV64-V512-NEXT: vmv.v.v v8, v9
; RV64-V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v4i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v12, v8, 2
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
ret <4 x i64> %a
}
@@ -615,6 +880,15 @@ define <8 x i8> @unary_interleave_v8i8(<8 x i8> %x) {
; V512-NEXT: vwmaccu.vx v9, a0, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v8i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 4
+; ZIP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v9, v8, v10
+; ZIP-NEXT: vmv1r.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <8 x i8> %x, <8 x i8> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 undef, i32 6, i32 3, i32 7>
ret <8 x i8> %a
}
@@ -640,6 +914,15 @@ define <8 x i16> @unary_interleave_v8i16(<8 x i16> %x) {
; V512-NEXT: vwmaccu.vx v9, a0, v8
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v8i16:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e16, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v10, v8, 4
+; ZIP-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v9, v10, v8
+; ZIP-NEXT: vmv.v.v v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 4, i32 undef, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
ret <8 x i16> %a
}
@@ -665,6 +948,15 @@ define <8 x i32> @unary_interleave_v8i32(<8 x i32> %x) {
; V512-NEXT: vwmaccu.vx v9, a0, v10
; V512-NEXT: vmv1r.v v8, v9
; V512-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_v8i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; ZIP-NEXT: vslidedown.vi v12, v8, 4
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
%a = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
ret <8 x i32> %a
}
@@ -679,6 +971,14 @@ define <4 x i8> @unary_interleave_10uu_v4i8(<4 x i8> %x) {
; CHECK-NEXT: vsll.vi v8, v8, 8
; CHECK-NEXT: vor.vv v8, v8, v9
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: unary_interleave_10uu_v4i8:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZIP-NEXT: vsrl.vi v9, v8, 8
+; ZIP-NEXT: vsll.vi v8, v8, 8
+; ZIP-NEXT: vor.vv v8, v8, v9
+; ZIP-NEXT: ret
%a = shufflevector <4 x i8> %x, <4 x i8> poison, <4 x i32> <i32 1, i32 0, i32 undef, i32 undef>
ret <4 x i8> %a
}
@@ -702,6 +1002,14 @@ define <16 x i16> @interleave_slp(<8 x i16> %v0, <8 x i16> %v1) {
; V512-NEXT: vwmaccu.vx v10, a0, v9
; V512-NEXT: vmv1r.v v8, v10
; V512-NEXT: ret
+;
+; ZIP-LABEL: interleave_slp:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZIP-NEXT: vmv1r.v v12, v9
+; ZIP-NEXT: ri.vzip2a.vv v10, v8, v12
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%v2 = shufflevector <8 x i16> %v0, <8 x i16> poison, <16 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 3, i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 7, i32 undef>
%v3 = shufflevector <8 x i16> %v1, <8 x i16> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -711,4 +1019,6 @@ entry:
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32-V128: {{.*}}
+; RV32-ZIP: {{.*}}
; RV64-V128: {{.*}}
+; RV64-ZIP: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
index c97f11301a05a..0a442940366e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-zipeven-zipodd.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+experimental-xrivosvizip -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZIP,ZIP-RV64
define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: zipeven_v4i32:
@@ -9,6 +11,13 @@ define <4 x i32> @zipeven_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: vmv.v.i v0, 10
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4i32:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i32> %c
@@ -22,6 +31,13 @@ define <4 x i32> @zipeven_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4i32_swapped:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v10, v9, v8
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
ret <4 x i32> %c
@@ -35,6 +51,13 @@ define <4 x i64> @zipeven_v4i64(<4 x i64> %a, <4 x i64> %b) {
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4i64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x i64> %c
@@ -47,6 +70,13 @@ define <4 x half> @zipeven_v4f16(<4 x half> %a, <4 x half> %b) {
; CHECK-NEXT: vmv.v.i v0, 10
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4f16:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9
+; ZIP-NEXT: vmv1r.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x half> %c
@@ -59,6 +89,13 @@ define <4 x float> @zipeven_v4f32(<4 x float> %a, <4 x float> %b) {
; CHECK-NEXT: vmv.v.i v0, 10
; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4f32:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x float> %c
@@ -72,6 +109,13 @@ define <4 x double> @zipeven_v4f64(<4 x double> %a, <4 x double> %b) {
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4f64:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
ret <4 x double> %c
@@ -86,6 +130,13 @@ define <4 x i32> @zipodd_v4i32(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v4i32:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
ret <4 x i32> %c
@@ -98,6 +149,13 @@ define <4 x i32> @zipodd_v4i32_swapped(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: vmv.v.i v0, 5
; CHECK-NEXT: vslidedown.vi v8, v9, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v4i32_swapped:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v10, v9, v8
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
ret <4 x i32> %c
@@ -110,6 +168,10 @@ define <4 x i32> @zipeven_v4i32_single(<4 x i32> %a) {
; CHECK-LABEL: zipeven_v4i32_single:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4i32_single:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 0, i32 poison, i32 2, i32 poison>
ret <4 x i32> %c
@@ -124,6 +186,12 @@ define <4 x i32> @zipodd_v4i32_single(<4 x i32> %a) {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslidedown.vi v8, v8, 1
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v4i32_single:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: vslidedown.vi v8, v8, 1
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 3, i32 poison>
ret <4 x i32> %c
@@ -136,6 +204,13 @@ define <4 x i32> @zipodd_v4i32_both(<4 x i32> %a) {
; CHECK-NEXT: vmv.v.i v0, 5
; CHECK-NEXT: vslidedown.vi v8, v8, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v4i32_both:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v9, v8, v8
+; ZIP-NEXT: vmv.v.v v8, v9
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
ret <4 x i32> %c
@@ -150,6 +225,13 @@ define <4 x i32> @zipeven_v4i32_both(<4 x i32> %a) {
; CHECK-NEXT: vslideup.vi v9, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4i32_both:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v9, v8, v8
+; ZIP-NEXT: vmv.v.v v8, v9
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
ret <4 x i32> %c
@@ -161,6 +243,12 @@ define <4 x i32> @zipeven_v4i32_partial(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v4i32_partial:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; ZIP-NEXT: vslideup.vi v8, v9, 1
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 poison>
ret <4 x i32> %c
@@ -174,6 +262,13 @@ define <4 x i32> @zipodd_v4i32_partial(<4 x i32> %a, <4 x i32> %b) {
; CHECK-NEXT: vslidedown.vi v9, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v9
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v4i32_partial:
+; ZIP: # %bb.0: # %entry
+; ZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v10, v8, v9
+; ZIP-NEXT: vmv.v.v v8, v10
+; ZIP-NEXT: ret
entry:
%c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 poison>
ret <4 x i32> %c
@@ -187,6 +282,13 @@ define <8 x i32> @zipeven_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslideup.vi v8, v10, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v8i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v12, v8, v10
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x i32> %out
}
@@ -200,6 +302,13 @@ define <8 x i32> @zipodd_v8i32(<8 x i32> %v1, <8 x i32> %v2) {
; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v8i32:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v12, v8, v10
+; ZIP-NEXT: vmv.v.v v8, v12
+; ZIP-NEXT: ret
%out = shufflevector <8 x i32> %v1, <8 x i32> %v2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x i32> %out
}
@@ -213,6 +322,13 @@ define <16 x i64> @zipeven_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vslideup.vi v8, v16, 1, v0.t
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipeven_v16i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; ZIP-NEXT: ri.vzipeven.vv v24, v8, v16
+; ZIP-NEXT: vmv.v.v v8, v24
+; ZIP-NEXT: ret
%out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
ret <16 x i64> %out
}
@@ -227,9 +343,18 @@ define <16 x i64> @zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) {
; CHECK-NEXT: vslidedown.vi v16, v8, 1, v0.t
; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
+;
+; ZIP-LABEL: zipodd_v16i64:
+; ZIP: # %bb.0:
+; ZIP-NEXT: vsetivli zero, 16, e64, m8, ta, ma
+; ZIP-NEXT: ri.vzipodd.vv v24, v8, v16
+; ZIP-NEXT: vmv.v.v v8, v24
+; ZIP-NEXT: ret
%out = shufflevector <16 x i64> %v1, <16 x i64> %v2, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
ret <16 x i64> %out
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV64: {{.*}}
+; ZIP-RV32: {{.*}}
+; ZIP-RV64: {{.*}}
More information about the llvm-commits
mailing list